Index: dspam/CHANGE.sdg diff -c /dev/null dspam/CHANGE.sdg:1.16.2.1 *** /dev/null Fri Jan 30 14:25:10 2004 --- dspam/CHANGE.sdg Fri Jan 30 13:33:03 2004 *************** *** 0 **** --- 1,147 ---- + Release 2.6.5.2-5 + ------------------- + Fix lock.c. Remove python subpackage - moved to pydspam. + Add missing headers for dspam-devel needed to compile pydspam package. + + Release 2.6.5.2-4 + ------------------- + + [20031022.0000] RPM Administrivia + + Change CGI to reference smart alias. Create falsepositive script + for CGI. Script can be modified to work for milter or dspam LDA or both. + Create hourly cron script to process lock timeouts for milter. + + [20031021.0000] SIGNATURE CORPUS bug + + Count signature spam corpus as miss. Check in testlibdspam. + Use pydspam-1.1.3. + RPM for dspam-python includes hourly cron to process lock timeouts. + + Release 2.6.5.2-2 + ----------------- + + [20030910.0002] OPT_IN support + + provide both dspam.optin and dspam.optout. Link dspam to dspam.optout + by default. + + [20030910.0001] testlibdspam + + Check dspam_process return code and free ctx->message. + Work around AIX RAND_MAX bug. + + [20030910.0000] Fix memory leaks in libdspam + + decode.c not destroying boundaries + lht.c not freeing token_name + libdspam.c not destroying header,body when calling process_signature + + Release 2.6.5.2-1 + ----------------- + + [20030906.0000] Merge dspam-2.6.5.2 + + Release 2.6.5-2 + --------------- + + [20030905.0000] Stack overflow on AIX + + tbt.c used recursion in tbt_sort and tbt_destroy. Modified tbt.c + to avoid recursion by adding a parent pointer to tree. Also eliminated + tbt_sort with its intermediate nodetree and provided an interator instead. + + Release 2.6.5-1 + --------------- + + [20030902.0001] Fix bugs, pass unit tests + + CTX->message never destroyed + + [20030902.0000] Merge changes from 2.6.5 + + CTX->copyback removed. Include pydspam-1.1.1 + + Release 2.6.4.01-1 + ------------------ + + [20030818.0000] Merge changes from 2.6.4.01 + + Include empty patch from Andrew W. Nosenko + + Release 2.6.4.01-1 + ------------------ + + [20030818.0000] Include optional smart spam alias in RPM + + [20030814.0000] Merge changes from 2.6.4 + + [20030808.0000] Merge changes from 2.6.4.b2 + + Release 2.6.3-2 + --------------- + + [20030804.0000] Install CGI script from RPM + + Creates a 'dspam' user that is also a member of the 'mail' group. + Installs dspam.cgi and its components in a directory /var/www/cgi-bin/dspam. + Creates a shell stub, /var/www/cgi-bin/dspam.cgi, that suexecs dspam/dspam.cgi. + + All end user needs to add is authorization config, for example AuthType and + AuthUserFile to /var/www/cgi-bin/dspam.cgi. + + Should RPM provide a default config that can be Included in httpd.conf? + + Release 2.6.3-1 + --------------- + + [20030801.0000] Merge changes from network dweebs + + Release 2.6.2.02-2 + ------------------ + + [20030730.0001] Fix space printing loop with long filename in dspam_stats.c + + Release 2.6.2.02-1 + ------------------ + + [20030730.0000] Move python support to pydspam package + + Install python utils with pydspam prefix in dspam-python package. + + [20030729.0000] Merge dspam-2.6.2.02 + + Did not keep _ds_process_*_token entry points because _ds_tokenize does not + take a CTX. The only thing it cares about is the CHAINED flag. I can't really + see either _ds_tokenize or _ds_process_*_token as member functions of a CTX. + They take a message buffer or token and produce or update a token table. + The process_*_token APIs are trivial to add if anyone uses them. So far, + I am the only consumer of my tokenize patch. + + Release 2.6.2-3 + ------------------ + [20030711.0000] Unit test for DSM_PROCESS + DSF_CORPUS with spamlike msg + + [20030709.0000] Unit test for empty body with DSF_IGNOREHEADER + + Empty body with DSF_IGNOREHEADER crashes 2.6.2. Fixed with reorganized + _ds_operate(). + + [20030709.0000] Simplify stat update, add unit tests + + _ds_operate() reorganized. + + [20030703.0000] Add DSF_SIGNATURE + DSF_CORPUS behaviour + + Does just what you would expect - adds to stats as with DSF_CORPUS, + but uses a signature (array of hashed tokens) instead of actual message text. + + [20030703.0000] move tokenizing to _ds_tokenize() function + + Moved tokenizing code to token.c. Unit test for _ds_tokenize(). + Fix bug with implicit function signature for _ds_getcrc64(). + + [20030703.0000] bug with DSF_CLASSIFY on empty dictionary + + Unit test for DSF_CLASSIFY checks that totals are unchanged both + in memory and on disk. Index: dspam/Makefile.am diff -c dspam/Makefile.am:1.1.1.4 dspam/Makefile.am:1.5 *** dspam/Makefile.am:1.1.1.4 Sat Sep 6 17:40:09 2003 --- dspam/Makefile.am Sat Sep 6 17:56:31 2003 *************** *** 14,20 **** # libdspam.a contans objects common for dspam and tools/* binaries libdspam_la_SOURCES = \ config.h libdspam_objects.h \ ! libdspam.c libdspam.h \ tbt.c tbt.h \ lht.c lht.h \ lock.c lock.h \ --- 14,20 ---- # libdspam.a contans objects common for dspam and tools/* binaries libdspam_la_SOURCES = \ config.h libdspam_objects.h \ ! libdspam.c token.c libdspam.h \ tbt.c tbt.h \ lht.c lht.h \ lock.c lock.h \ Index: dspam/Makefile.in diff -c dspam/Makefile.in:1.1.1.5 dspam/Makefile.in:1.6 *** dspam/Makefile.in:1.1.1.5 Sat Sep 6 17:40:09 2003 --- dspam/Makefile.in Sat Sep 6 17:56:31 2003 *************** *** 140,146 **** # libdspam.a contans objects common for dspam and tools/* binaries libdspam_la_SOURCES = \ config.h libdspam_objects.h \ ! libdspam.c libdspam.h \ tbt.c tbt.h \ lht.c lht.h \ lock.c lock.h \ --- 140,146 ---- # libdspam.a contans objects common for dspam and tools/* binaries libdspam_la_SOURCES = \ config.h libdspam_objects.h \ ! libdspam.c token.c libdspam.h \ tbt.c tbt.h \ lht.c lht.h \ lock.c lock.h \ *************** *** 175,181 **** libdspam_la_LIBADD = am_libdspam_la_OBJECTS = libdspam.lo tbt.lo lht.lo lock.lo base64.lo \ ! buffer.lo localdb.lo util.lo nodetree.lo error.lo decode.lo libdspam_la_OBJECTS = $(am_libdspam_la_OBJECTS) bin_PROGRAMS = dspam$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) --- 175,181 ---- libdspam_la_LIBADD = am_libdspam_la_OBJECTS = libdspam.lo tbt.lo lht.lo lock.lo base64.lo \ ! buffer.lo localdb.lo util.lo nodetree.lo error.lo decode.lo token.lo libdspam_la_OBJECTS = $(am_libdspam_la_OBJECTS) bin_PROGRAMS = dspam$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) Index: dspam/addspam.sh diff -c /dev/null dspam/addspam.sh:1.2.2.1 *** /dev/null Fri Jan 30 14:25:10 2004 --- dspam/addspam.sh Fri Jan 30 13:30:29 2004 *************** *** 0 **** --- 1,26 ---- + #!/bin/sh + + die() { + echo `date '+%b%d %H:%M:%S'` "$*" >&2 + exit 1 + } + + log() { + echo `date '+%b%d %H:%M:%S'` "$*" >&2 + } + + action="--`basename $0 .sh`" + log dspam -d $user $action + + exec >>/var/log/dspam.log 2>&1 + + read from || die "No input" + set - $from + envfrom="$2" + IFS="@" + set - $envfrom + user="$1" + domain="$2" + [ "$domain" = "yourcompany.com" ] || die "Invalid source domain: $domain" + log dspam -d $user $action + /usr/local/bin/dspam -d $user $action || die "DSPAM error" Index: dspam/decode.c diff -c dspam/decode.c:1.1.1.2 dspam/decode.c:1.2 *** dspam/decode.c:1.1.1.2 Sat Sep 6 17:40:09 2003 --- dspam/decode.c Wed Sep 10 21:52:43 2003 *************** *** 200,205 **** --- 200,206 ---- } free(m_in); + nt_destroy(boundaries); return out; } Index: dspam/dspam.c diff -c dspam/dspam.c:1.1.1.10 dspam/dspam.c:1.13 *** dspam/dspam.c:1.1.1.10 Sat Sep 6 17:40:09 2003 --- dspam/dspam.c Sat Sep 6 17:56:31 2003 *************** *** 18,23 **** --- 18,57 ---- */ + /* + * $Log: dspam.c,v $ + * Revision 1.13 2003/09/06 21:56:31 stuart + * Merge dspam-2.6.5.2 release + * + * Revision 1.12 2003/09/02 21:03:32 stuart + * Merge changes for 2.6.5 release. + * + * Revision 1.11 2003/09/01 17:40:45 stuart + * Using memory after free when allocation fails delivering message. + * + * Revision 1.10 2003/08/09 02:48:28 stuart + * Merge changes from 2.6.4.b2 + * + * Revision 1.9 2003/08/01 16:27:35 stuart + * Merge changes from 2.6.3 + * + * Revision 1.8 2003/07/29 21:21:06 stuart + * Merge with dspam-2.6.2.02 + * + * Revision 1.7 2003/07/02 12:57:12 stuart + * Beta release. + * + * Revision 1.6 2003/07/01 20:00:06 stuart + * Merge changes from 2.6.2-b1 + * + * Revision 1.5 2003/07/01 19:02:19 stuart + * Free copyback buffer. + * + * Revision 1.4 2003/07/01 01:54:58 stuart + * Per email from Jonathan, don't mess with IGNOREHEADER. + * + */ + #ifdef HAVE_CONFIG_H #include #endif *************** *** 358,364 **** } #ifndef SIGNATURE_ATTACHMENTS ! if ((result = h_sig->open(h_sig, NULL, signature, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP))!=0) { file_error(ERROR_FILE_WRITE, signature, db_strerror(result)); return -4; } --- 392,398 ---- } #ifndef SIGNATURE_ATTACHMENTS ! if ((result = h_sig->open(h_sig, TRANID_PLACEHOLDER signature, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP))!=0) { file_error(ERROR_FILE_WRITE, signature, db_strerror(result)); return -4; } *************** *** 578,588 **** } if (!have_signature && mode != DSM_PROCESS) { ! CTX->flags = CTX->flags ^ DSF_SIGNATURE; CTX->signature = NULL; } ! if (have_signature && mode != DSM_PROCESS) result = dspam_process(CTX, NULL); else { CTX->signature = NULL; --- 612,622 ---- } if (!have_signature && mode != DSM_PROCESS) { ! CTX->flags &= ~DSF_SIGNATURE; CTX->signature = NULL; } ! if (have_signature && mode != DSM_PROCESS) result = dspam_process(CTX, NULL); else { CTX->signature = NULL; Index: dspam/dspam.spec diff -c /dev/null dspam/dspam.spec:1.49.2.2 *** /dev/null Fri Jan 30 14:25:10 2004 --- dspam/dspam.spec Fri Jan 30 14:24:52 2004 *************** *** 0 **** --- 1,286 ---- + %ifos Linux + %define sendmailcf /usr/share/sendmail-cf + %define htmldir /var/www/html + %define cgibin /var/www/cgi-bin + %else + %define sendmailcf /usr/lib/sendmail-cf + %define htmldir /Public + %define cgibin /usr/local/www/cgi-bin + %endif + + Summary: A library and Mail Delivery Agent for Bayesian spam filtering + Name: dspam + Version: 2.6.5.2 + Release: 5 + Copyright: GPL + URL: http://www.networkdweebs.com/software/dspam/ + Group: System Environment/Daemons + Source: http://bmsi.com/linux/dspam-%{version}.tar.gz + Source1: dspam.m4 + Patch: dspam-2.6.5-db3.patch + Patch1: dspam-2.6.5.2.patch + Buildroot: /var/tmp/dspam-root + %ifos Linux + BuildRequires: db3-devel patch + Requires: /usr/sbin/useradd + %else + %ifos aix4.1 + BuildRequires: db3-devel patch + %else + BuildRequires: db4-devel patch + %endif + %endif + + %package devel + Summary: Developers library for custom access to dspam + Group: Development/Libraries + + %description + DSPAM (as in De-Spam) is an open-source project to create a new kind of + anti-spam mechanism, and is currently effective as both a server-side agent + for UNIX email servers and a developer's library for mail clients, other + anti-spam tools, and similar projects requiring drop-in spam filtering. + + The DSPAM agent masquerades as the email server's local delivery agent and + filters/learns spams using an advanced Bayesian statistical approach (based on + Baye's theorem of combined probabilities) which provides an administratively + maintenance-free, easy-learning Anti-Spam service custom tailored to each + individual user's behavior. Advanced because on top of standard Bayesian + filtering is also incorporated the use of Chained Tokens, de-obfuscation, and + other enhancements. DSPAM works great with Sendmail and Exim, and should work + well with any other MTA that supports an external local delivery agent + (postfix, qmail, etc.) + + %description devel + DSPAM has had its core engine moved into a separate library, libdspam. + This library can be used by developers to provide 'drop-in' spam filtering for + their mail client applications, other anti-spam tools, or similar projects. + + %prep + %setup -q + %patch -p1 + %patch1 -p1 + + %build + %ifos aix4.1 + export CC="gcc -mthreads" + LDFLAGS="-Wl,-blibpath:/lib:/usr/local/lib" + %else + LDFLAGS=-s + %endif + CFLAGS="$RPM_OPT_FLAGS" + export CFLAGS LDFLAGS + ./configure --with-userdir=/var/lib/dspam \ + --with-userdir-owner=none \ + --with-userdir-group=none \ + --with-dspam-owner=none \ + --with-dspam-group=none \ + %ifos aix4.1 + --with-local-delivery-agent=/bin/bellmail \ + %endif + --disable-dependency-tracking + make + mv dspam dspam.optout + rm dspam.o + make dspam CPPFLAGS=-DOPT_IN + ln dspam dspam.optin + + %install + rm -rf $RPM_BUILD_ROOT + make install DESTDIR=$RPM_BUILD_ROOT + + # include both optin and optout version of dspam + cp dspam.optout $RPM_BUILD_ROOT/usr/local/bin + cd $RPM_BUILD_ROOT/usr/local/bin + mv dspam dspam.optin + ln -s dspam.optout dspam + cd - + + # allow others to query stats + chmod g+s $RPM_BUILD_ROOT/usr/local/bin/dspam_stats + + # manually copy include files needed for devel package + INCDIR="$RPM_BUILD_ROOT/usr/local/include" + mkdir -p $INCDIR + cp -p libdspam.h libdspam_objects.h lht.h nodetree.h decode.h buffer.h $INCDIR + + # provide maintenance scripts + ETCDIR="$RPM_BUILD_ROOT/etc" + mkdir -p $ETCDIR/cron.daily + mkdir -p $ETCDIR/cron.weekly + cat >$ETCDIR/cron.daily/dspam <<'EOF' + #!/bin/sh + /usr/local/bin/dspam_clean + EOF + chmod a+x $ETCDIR/cron.daily/dspam + cat >$ETCDIR/cron.weekly/dspam <<'EOF' + #!/bin/sh + /usr/local/bin/dspam_purge + EOF + chmod a+x $ETCDIR/cron.weekly/dspam + + # install script for optional smart spam alias + cp -p addspam.sh $RPM_BUILD_ROOT/usr/local/bin/addspam + mkdir -p $RPM_BUILD_ROOT/var/log + touch $RPM_BUILD_ROOT/var/log/dspam.log + + # install script for optional smart fp alias + cp -p addspam.sh $RPM_BUILD_ROOT/usr/local/bin/falsepositive + chmod a+x $RPM_BUILD_ROOT/usr/local/bin/falsepositive + + # allow dspam in /etc/smrsh + mkdir -p $ETCDIR/smrsh + ln -sf /usr/local/bin/dspam $ETCDIR/smrsh + ln -sf /usr/local/bin/addspam $ETCDIR/smrsh + + # install sendmail mailer + mkdir -p $RPM_BUILD_ROOT%{sendmailcf}/mailer + cp -p %{SOURCE1} $RPM_BUILD_ROOT%{sendmailcf}/mailer + + # install CGI script + CGIDIR="$RPM_BUILD_ROOT%{cgibin}" + HTMLDIR="$RPM_BUILD_ROOT%{htmldir}" + mkdir -p $HTMLDIR/dspam + mkdir -p $CGIDIR + mkdir -p $RPM_BUILD_ROOT/etc/mail + ln -sf /var/lib/dspam $RPM_BUILD_ROOT/etc/mail/dspam + cp -p cgi/* $HTMLDIR/dspam + %ifos aix4.1 + # No suexec on our AIX installs + cat >$CGIDIR/dspam.cgi <<'EOF' + #!/bin/sh + cd %{htmldir}/dspam + exec /usr/local/bin/perl dspam.cgi + EOF + %else + # Use suexec to run CGI + cat >$CGIDIR/dspam.cgi <<'EOF' + #!/bin/sh + cd %{htmldir}/dspam + exec /usr/sbin/suexec dspam dspam dspam.cgi + EOF + %endif + chmod 0755 $HTMLDIR/dspam $HTMLDIR/dspam/dspam.cgi + + %clean + rm -rf $RPM_BUILD_ROOT + + %ifos linux + %pre + /usr/sbin/useradd -G mail -d /var/lib/dspam -c "Dspam agent" -s /dev/null \ + dspam >/dev/null 2>&1 || : + + %post + if grep '^/usr/local/lib$' /etc/ld.so.conf >/dev/null; then + : + else + echo "/usr/local/lib" >>/etc/ld.so.conf + fi + /sbin/ldconfig + %endif + %ifos aix4.1 + %pre + mkuser -a pgrp=mail home=/var/lib/dspam \ + gecos="DSpam mail filter" dspam 2>/dev/null || : + %endif + + %files + %defattr(-,root,root) + %doc README CHANGE CHANGE.sdg dspam-button.gif + %ifnos aix4.1 + /usr/local/lib/libdspam.so.3.0.0 + /usr/local/lib/libdspam.so.3 + %endif + %attr(02511,root,mail)/usr/local/bin/dspam.optin + %attr(02511,root,mail)/usr/local/bin/dspam.optout + %attr(-,root,mail)/usr/local/bin/dspam + %attr(-,root,mail)/usr/local/bin/dspam_clean + %attr(-,root,mail)/usr/local/bin/dspam_convert + %attr(-,root,mail)/usr/local/bin/dspam_crc + %attr(-,root,mail)/usr/local/bin/dspam_dump + %attr(-,root,mail)/usr/local/bin/dspam_purge + %attr(-,root,mail)/usr/local/bin/dspam_stats + /usr/local/bin/dspam_corpus + /usr/local/bin/dspam_genaliases + %attr(0775,root,mail) /var/lib/dspam + /etc/cron.daily/dspam + /etc/cron.weekly/dspam + /etc/smrsh/dspam + /etc/smrsh/addspam + %{sendmailcf}/mailer/* + %attr(-,dspam,dspam)%{htmldir}/dspam + %attr(0755,root,root)%{cgibin}/dspam.cgi + /etc/mail/dspam + %config %attr(0755,root,mail)/usr/local/bin/addspam + %config %attr(0755,root,mail)/usr/local/bin/falsepositive + %attr(0664,root,mail)/var/log/dspam.log + + %files devel + %defattr(-,root,root) + %ifnos aix4.1 + /usr/local/lib/libdspam.so + %endif + /usr/local/lib/libdspam.la + /usr/local/lib/libdspam.a + /usr/local/include/* + + %changelog + * Thu Jan 30 2004 Stuart Gathman 2.6.5.2-5 + - Move dspam-python to pydspam package + - improve smart alias + - add missing headers for dspam-devel + - fix dictionary lock + * Tue Oct 21 2003 Stuart Gathman 2.6.5.2-4 + - pydspam-1.1.4 + - run pydspam_process on the hour + - Count signature spam corpus as miss + - Remove "Delete All" from CGI and default messages to checked. + * Wed Sep 10 2003 Stuart Gathman + - Fix memory leaks + - Increase lock timeout + - Make dspam sgid and a+x so that generic addspam works + - Install optin and optout versions. + * Sat Sep 06 2003 Stuart Gathman + - Merge dspam-2.6.5.2 + - Move cgi to /var/www/html/dspam. logo and css weren't getting + - found under cgi-bin. + * Fri Sep 05 2003 Stuart Gathman + - Modify tbt.c to use parent pointer and eliminate recursion which + - was overflowing thread stack on AIX + * Tue Sep 02 2003 Stuart Gathman + - Merge changes for release 2.6.5 + - use pydspam 1.1.1 + * Wed Aug 27 2003 Stuart Gathman + - Tweak for AIX + * Thu Aug 18 2003 Stuart Gathman + - Merge changes for 2.6.4.01 + - empty input patch + - Include smart spam alias + * Thu Aug 14 2003 Stuart Gathman + - Merge changes for 2.6.4 + * Mon Aug 04 2003 Stuart Gathman + - Install CGI script to run as dspam user + * Thu Jul 31 2003 Stuart Gathman + - Make building python package optional + - OK, OK, so maybe it should be a separate RPM + * Wed Jul 30 2003 Stuart Gathman + - Fix dspam_stats bug for release 2 + * Wed Jul 30 2003 Stuart Gathman + - Move python source to pydspam project + - merge dspam-2.6.2.02 from networkdweebs + * Fri Jul 11 2003 Stuart Gathman + - Move python support to sub package + - fix CORPUS bug + * Thu Jul 10 2003 Stuart Gathman + - Bug fixes, python support. + * Thu Jul 03 2003 Stuart Gathman + - Merge with 2.6.2 stable + * Wed Jul 02 2003 Stuart Gathman + - Fix bugs in DSF_CLASSIFY + * Mon Jun 30 2003 Stuart Gathman + - Fix bugs in dspam.c and libdspam.c + * Thu Jun 26 2003 Stuart Gathman + - Add dspam to /etc/smrsh + - Add dspam mailer to sendmail-cf + * Wed Jun 25 2003 Stuart Gathman + - Linux RPM Index: dspam/lht.c diff -c dspam/lht.c:1.1.1.2 dspam/lht.c:1.2 *** dspam/lht.c:1.1.1.2 Tue Sep 2 16:30:30 2003 --- dspam/lht.c Wed Sep 10 21:52:43 2003 *************** *** 77,89 **** node = c_lht_first(lht, &c); while(node != NULL) { next = c_lht_next(lht, &c); lht_delete(lht, node->key); node = next; } free(lht->tbl); free(lht); - lht = (struct lht *) NULL; return 0; } --- 77,89 ---- node = c_lht_first(lht, &c); while(node != NULL) { next = c_lht_next(lht, &c); + free(node->token_name); lht_delete(lht, node->key); node = next; } free(lht->tbl); free(lht); return 0; } Index: dspam/libdspam.c diff -c dspam/libdspam.c:1.1.1.12 dspam/libdspam.c:1.25 *** dspam/libdspam.c:1.1.1.12 Sat Sep 6 17:40:09 2003 --- dspam/libdspam.c Tue Oct 21 20:41:24 2003 *************** *** 18,23 **** --- 18,99 ---- */ + /* + * $Log: libdspam.c,v $ + * Revision 1.25 2003/10/22 00:41:24 stuart + * Count signature spam corpus as miss. + * + * Revision 1.24 2003/09/11 01:52:43 stuart + * Fix memory leaks. + * + * Revision 1.23 2003/09/06 23:52:51 stuart + * Can't destroy message because dspam.c might have passed it in. + * + * Revision 1.22 2003/09/06 23:15:13 stuart + * Put tbt_create back the way it was. + * + * Revision 1.21 2003/09/06 21:56:31 stuart + * Merge dspam-2.6.5.2 release + * + * Revision 1.20 2003/09/06 07:21:09 stuart + * Don't create tbt *index until needed. + * + * Revision 1.19 2003/09/06 01:38:28 stuart + * Modify tbt.c to eliminate recursion and intermediate nodetree. + * + * Revision 1.18 2003/09/03 05:41:00 stuart + * dspam.c needs to send decoded message into dspam_process, so + * caller needs to destroy ctx->message. + * + * Revision 1.17 2003/09/03 04:09:28 stuart + * CTX->message never destroyed + * + * Revision 1.16 2003/09/02 21:03:32 stuart + * Merge changes for 2.6.5 release. + * + * Revision 1.15 2003/08/18 21:00:26 stuart + * Merge changes from 2.6.4.01 + * + * Revision 1.14 2003/08/14 17:52:43 stuart + * Merged vendor changes for 2.6.4 + * + * Revision 1.13 2003/08/09 02:48:28 stuart + * Merge changes from 2.6.4.b2 + * + * Revision 1.12 2003/08/07 14:40:50 stuart + * Test case for quoted printable decoding. + * + * Revision 1.11 2003/08/01 16:27:35 stuart + * Merge changes from 2.6.3 + * + * Revision 1.10 2003/07/29 21:21:06 stuart + * Merge with dspam-2.6.2.02 + * + * Revision 1.9 2003/07/11 18:59:08 stuart + * *** empty log message *** + * + * Revision 1.8 2003/07/09 14:08:49 stuart + * Simplify stat update logic, unit tests for stat update + * + * Revision 1.7 2003/07/03 18:55:51 stuart + * Move tokenize to separate soruce file. + * + * Revision 1.6 2003/07/03 14:12:54 stuart + * Don't undo stats with SIGNATURE + CORPUS + * + * Revision 1.5 2003/07/03 14:03:39 stuart + * Add SIGNATURE + CORPUS behaviour + * Test updating after CLASSIFY sith SIGNATURE + CORPUS + * Test CORPUS + * + * Revision 1.4 2003/07/01 20:00:06 stuart + * Merge changes from 2.6.2-b1 + * + * Revision 1.3 2003/07/01 01:37:46 stuart + * Typo checking out of memory. + * + */ + #ifdef HAVE_CONFIG_H #include #endif *************** *** 99,105 **** _ds_close_db(CTX); if (CTX->signature != NULL && CTX->mode == DSM_PROCESS) free(CTX->signature); - free(CTX); return 0; } --- 175,180 ---- *************** *** 129,134 **** --- 204,215 ---- char heading[1024]; int i = 0; + + CTX->_process_start = time(NULL); + + if (CTX->mode != DSM_PROCESS && (CTX->flags & DSF_SIGNATURE)) + return _ds_process_signature(CTX); + header = buffer_create(NULL); body = buffer_create(NULL); *************** *** 139,149 **** return -1; } - CTX->_process_start = time(NULL); - - if (CTX->mode != DSM_PROCESS && (CTX->flags & DSF_SIGNATURE)) - return _ds_process_signature(CTX); - if (CTX->message == NULL && message != NULL) CTX->message = _ds_actualize_message(message); --- 220,225 ---- *************** *** 319,325 **** } */ ! if ((CTX->result = CTX->db->open(CTX->db, NULL, CTX->dictionary, NULL, DB_BTREE, DB_CREATE, 0))!=0) { LOG(LOG_WARNING, "db->open failed: %s", db_strerror(CTX->result)); buffer_destroy(header); buffer_destroy(body); --- 395,401 ---- } */ ! if ((CTX->result = CTX->db->open(CTX->db, TRANID_PLACEHOLDER CTX->dictionary, NULL, DB_BTREE, DB_CREATE, 0))!=0) { LOG(LOG_WARNING, "db->open failed: %s", db_strerror(CTX->result)); buffer_destroy(header); buffer_destroy(body); *************** *** 354,359 **** --- 430,476 ---- return -2; } + /* calculate spam score from maxitems most interesting items */ + static float + _ds_bayescalc(struct lht *freq,struct tbt *index,int maxitems) { + /* Bayesian Calculations */ + double bay_top = 0.0; /* AB */ + double bay_bot = 0.0; /* (1-A)(1-B) */ + struct tbt_node *node; + int i; + + node = tbt_first(index); + for (i = 0; i < index->items && i < maxitems; i++) { + unsigned long long crc; + char *token_name; + struct _ds_spam_stat stat; + + crc = node->token; + token_name = lht_gettoken(freq, crc); + + if (lht_getspamstat(freq, crc, &stat) || token_name == NULL) { + node = tbt_next(node); + continue; + } + + LOGDEBUG("combining: [%2.6f] %s %ld %ld", + stat.probability, token_name, stat.spam_hits, stat.innocent_hits); + + if (bay_top == 0.0) + bay_top = stat.probability; + else + bay_top *= stat.probability; + + if (bay_bot == 0.0) + bay_bot = 1-stat.probability; + else + bay_bot *= (1-stat.probability); + + node = tbt_next(node); + } + return (float)(bay_top / (bay_top + bay_bot)); + } + /* _ds_operate: operate on the message - calculate the statistical probability the email is spam - update tokens in dictionary according to result/mode *************** *** 369,614 **** int _ds_operate(DSPAM_CTX *CTX, char *headers, char *body) { - char *token; /* current token */ - char joined_token[32]; /* used for de-obfuscating tokens */ - char *previous_token = NULL; /* used for chained tokens */ - - char *line = NULL; /* header broken up into lines */ - char *url_body; /* urls broken up */ - - char heading[128]; /* current heading */ - int alloc_joined=0; /* track joined token free()'s */ - int i; - - /* Bayesian Calculations */ - float bay_top = 0.0; /* AB */ - float bay_bot = 0.0; /* (1-A)(1-B) */ - /* Long Hashed Token Tree: Track tokens, frequencies, and stats */ ! struct lht *freq = lht_create(1543); struct lht_node *node_lht; struct lht_c c_lht; struct _ds_spam_stat stat; struct tbt *index = tbt_create(); /* Binary tree index */ ! struct nt *sort; /* Sort array */ ! struct nt *header; /* header array */ ! struct nt_node *node_nt; ! struct nt_c c_nt; ! ! joined_token[0] = 0; ! header = nt_create(NT_CHAR); ! if (freq == NULL || header == NULL || index == NULL) { ! tbt_destroy(index); ! nt_destroy(header); lht_destroy(freq); LOG(LOG_CRIT, "memory allocation failed"); return -1; } - CTX->result = (CTX->mode == DSM_ADDSPAM) ? 1 : 0; - - /* HEADER: Split up the text into tokens, include heading */ - line = strtok(headers, "\n"); - while(line != NULL) { - nt_add(header, line); - line = strtok(NULL, "\n"); - } - - node_nt = c_nt_first(header, &c_nt); - heading[0] = 0; - while(node_nt != NULL) { - int is_received; - joined_token[0] = 0; - alloc_joined = 0; - - line = node_nt->ptr; - token = strtok(line, ":"); - if (token != NULL && token[0] != 32 && token[0] != 9 && !strstr(token, " ")) { - strlcpy(heading, token, 128); - previous_token = NULL; - } - - #ifdef VERBOSE - LOGDEBUG("Reading '%s' header", heading); - #endif - - is_received = (!strcmp(heading, "Received") ? 1 : 0); - - if (is_received) - token = strtok(NULL, DELIMITERS_HEADING); - else - token = strtok(NULL, DELIMITERS); - - while(token != NULL) { - int l; - - l = strlen(token); - if ((l>2 && l<25) || (l == 2 && !strchr(token, '$') && !strchr(token, '!'))) { - - #ifdef VERBOSE - LOGDEBUG("Processing '%s' token in '%s' header", token, heading); - #endif - - /* If we had to join a token together (e.g. S E X), process it */ - if (joined_token[0] != 0) { - if (strlen(joined_token)<25 && joined_token[1]!=0) { - if (! _ds_process_header_token(CTX, joined_token, previous_token, freq, heading) && (CTX->flags & DSF_CHAINED)) { - alloc_joined = 1; - previous_token = strdup(joined_token); - } - } - joined_token[0] = 0; - } - - if (! _ds_process_header_token(CTX, token, previous_token, freq, heading) && (CTX->flags & DSF_CHAINED)) { - if (alloc_joined) { - free(previous_token); - alloc_joined = 0; - } - previous_token = token; - } - - } else if (l==1 || (l==2 && (strchr(token, '$') || strchr(token, '!'))) ) { - strlcat(joined_token, token, sizeof(joined_token)); - } - - if (is_received) - token = strtok(NULL, DELIMITERS_HEADING); - else - token = strtok(NULL, DELIMITERS); - } - node_nt = c_nt_next(header, &c_nt); - - if (joined_token[0] != 0) { - if (strlen(joined_token)<25 && joined_token[1]!=0) { - _ds_process_header_token(CTX, joined_token, previous_token, freq, heading); - } - } - - } - - nt_destroy(header); - - if (alloc_joined) - free(previous_token); - - previous_token = NULL; - - /* BODY: Split up URLs into tokens, count frequency */ - url_body = strdup(body); - if (url_body != NULL) { - char combined_token[256]; - char *url_ptr = url_body; - int url_length; - unsigned long long crc; - - token = strstr(url_ptr, "http://"); - while(token != NULL) { - url_ptr = token; - - token = strtok(token, " \n\">"); - if (token != NULL) { - url_length = strlen(token); - - /* Individual tokens form the URL */ - token = strtok(token, DELIMITERS); - while(token != NULL) { - snprintf(combined_token, sizeof(combined_token), "Url*%s", token); - crc = _ds_getcrc64(combined_token); - lht_hit(freq, crc, combined_token); - token = strtok(NULL, DELIMITERS); - } - - memset(body+(url_ptr-url_body), 32, url_length); - url_ptr += url_length + 1; - token = strstr(url_ptr, "http://"); - } else - token = NULL; - } - free(url_body); - } - - url_body = strdup(body); - if (url_body != NULL) { - char combined_token[256]; - char *url_ptr = url_body; - int url_length; - unsigned long long crc; - - url_ptr = url_body; - token = strstr(url_ptr, "href=\""); - while(token != NULL) { - url_ptr = token+6; - - token = strtok(url_ptr, " \n\">"); - if (token != NULL) { - url_length = strlen(token); - - /* Individual tokens form the URL */ - token = strtok(url_ptr, DELIMITERS); - while(token != NULL) { - snprintf(combined_token, sizeof(combined_token), "Url*%s", token); - crc = _ds_getcrc64(combined_token); - lht_hit(freq, crc, combined_token); - token = strtok(NULL, DELIMITERS); - } - - memset(body+(url_ptr-url_body), 32, url_length); - - url_ptr += url_length + 1; - token = strstr(url_ptr, "href=\""); - } else - token = NULL; - } - - free(url_body); - } - - /* BODY: Split up the text into tokens, count frequency */ - joined_token[0] = 0; - alloc_joined = 0; - token = strtok(body, DELIMITERS); - while(token != NULL) { - int l = strlen(token); - if ((l>2 && l<25) || (l == 2 && !strchr(token, '$') && !strchr(token, '!'))) { - /* If we had to join a token together (e.g. S E X), process it */ - if (joined_token[0] != 0) { - if (strlen(joined_token)<25 && joined_token[1]!=0) { - if (! _ds_process_body_token(CTX, joined_token, previous_token, freq) && (CTX->flags & DSF_CHAINED)) { - alloc_joined = 1; - previous_token = strdup(joined_token); - } - } - joined_token[0] = 0; - } - - if (! _ds_process_body_token(CTX, token, previous_token, freq) && (CTX->flags & DSF_CHAINED)) { - if (alloc_joined) { - alloc_joined = 0; - free(previous_token); - } - previous_token = token; - } - } else if (l==1 || (l==2 && (strchr(token, '$') || strchr(token, '!'))) ) { - strlcat(joined_token, token, sizeof(joined_token)); - } - token = strtok(NULL, DELIMITERS); - } - - if (joined_token[0] != 0) { - if (strlen(joined_token)<25 && joined_token[1]!=0) { - _ds_process_body_token(CTX, joined_token, previous_token, freq); - } - } - /* Create a binary tree index sorted by a token's delta from .5 */ /* Also load the statistics for each token */ node_lht = c_lht_first(freq, &c_lht); ! while(node_lht != NULL) { ! _ds_load_stat(CTX, node_lht->key, &stat); lht_setspamstat(freq, node_lht->key, &stat); tbt_add(index, stat.probability, node_lht->key); --- 486,515 ---- int _ds_operate(DSPAM_CTX *CTX, char *headers, char *body) { /* Long Hashed Token Tree: Track tokens, frequencies, and stats */ ! struct lht *freq; struct lht_node *node_lht; struct lht_c c_lht; struct _ds_spam_stat stat; struct tbt *index = tbt_create(); /* Binary tree index */ ! struct tbt_node *node; ! int i; ! int reverse = 0; /* correcting stats if true */ ! freq = _ds_tokenize((CTX->flags & DSF_CHAINED),headers,body); ! if (freq == NULL || index == NULL) { lht_destroy(freq); + tbt_destroy(index); LOG(LOG_CRIT, "memory allocation failed"); return -1; } /* Create a binary tree index sorted by a token's delta from .5 */ /* Also load the statistics for each token */ node_lht = c_lht_first(freq, &c_lht); ! while (node_lht != NULL) { _ds_load_stat(CTX, node_lht->key, &stat); lht_setspamstat(freq, node_lht->key, &stat); tbt_add(index, stat.probability, node_lht->key); *************** *** 617,638 **** #endif node_lht = c_lht_next(freq, &c_lht); - } - - sort = tbt_sort(index); - if (sort == NULL) { - LOG(LOG_CRIT, "memory allocation failed"); - tbt_destroy(index); - lht_destroy(freq); - return -1; } ! ! /* Take the 15 most interesting tokens and generate a score, update dictionary */ ! ! if (sort->items==0) { LOGDEBUG("no tokens found in message"); tbt_destroy(index); - nt_destroy(sort); lht_destroy(freq); return -2; } --- 518,528 ---- #endif node_lht = c_lht_next(freq, &c_lht); } ! ! if (index->items==0) { LOGDEBUG("no tokens found in message"); tbt_destroy(index); lht_destroy(freq); return -2; } *************** *** 643,738 **** if (CTX->signature == NULL) { LOG(LOG_CRIT, "memory allocation error"); tbt_destroy(index); - nt_destroy(sort); lht_destroy(freq); return -1; } ! CTX->signature->length = sizeof(unsigned long long)*sort->items; CTX->signature->data = malloc(CTX->signature->length); if (CTX->signature->data == NULL) { LOG(LOG_CRIT, "memory allocation error"); free(CTX->signature); CTX->signature = NULL; tbt_destroy(index); - nt_destroy(sort); lht_destroy(freq); return -1; } } ! node_nt = c_nt_first(sort, &c_nt); ! ! CTX->probability = -1; ! ! for(i=0;iitems;i++) { ! unsigned long long crc; ! char *token_name; ! crc = * (unsigned long long *) node_nt->ptr; ! token_name = lht_gettoken(freq, crc); if (CTX->flags & DSF_SIGNATURE) { memcpy((char *)CTX->signature->data + (i*sizeof(unsigned long long)), &crc, sizeof(unsigned long long)); } if (lht_getspamstat(freq, crc, &stat) || token_name == NULL) { ! node_nt = c_nt_next(sort, &c_nt); continue; } ! if (i<15) { ! LOGDEBUG("combining: [%2.6f] %s %ld %ld", stat.probability, token_name, stat.spam_hits, stat.innocent_hits); ! ! if (bay_top == 0.0) ! bay_top = stat.probability; ! else ! bay_top *= stat.probability; ! if (bay_bot == 0.0) ! bay_bot = 1-stat.probability; ! else ! bay_bot *= (1-stat.probability); } ! ! if (i == 15 && CTX->probability < 0) ! CTX->probability = (bay_top) / (bay_top + bay_bot); ! ! if (i>=15) { ! if (CTX->probability>=.9 && CTX->mode != DSM_FALSEPOSITIVE) ! CTX->result = DSR_ISSPAM; ! ! if (CTX->result == DSR_ISSPAM) ! stat.spam_hits++; ! else if (CTX->result != DSM_FALSEPOSITIVE) ! stat.innocent_hits++; ! ! if (CTX->mode == DSM_ADDSPAM && (! (CTX->flags & DSF_CORPUS))) ! stat.innocent_hits--; ! ! if (CTX->mode == DSM_FALSEPOSITIVE) { ! stat.innocent_hits++; ! stat.spam_hits--; ! if (stat.spam_hits<0) ! stat.spam_hits = 0; ! } ! ! if (stat.innocent_hits<0) ! stat.innocent_hits = 0; ! ! _ds_save_stat(CTX, crc, &stat); ! } - - node_nt = c_nt_next(sort, &c_nt); - } ! CTX->probability = (bay_top) / (bay_top + bay_bot); ! if (CTX->probability>=.9 && CTX->mode != DSM_FALSEPOSITIVE) { ! CTX->result = DSR_ISSPAM; } if (CTX->result == DSR_ISSPAM) { CTX->totals.total_spam++; --- 533,612 ---- if (CTX->signature == NULL) { LOG(LOG_CRIT, "memory allocation error"); tbt_destroy(index); lht_destroy(freq); return -1; } ! CTX->signature->length = sizeof(unsigned long long)*index->items; CTX->signature->data = malloc(CTX->signature->length); if (CTX->signature->data == NULL) { LOG(LOG_CRIT, "memory allocation error"); free(CTX->signature); CTX->signature = NULL; tbt_destroy(index); lht_destroy(freq); return -1; } } ! if (CTX->mode == DSM_FALSEPOSITIVE) { ! CTX->probability = 0.0; ! CTX->result = DSR_ISINNOCENT; ! } ! else if (CTX->mode == DSM_ADDSPAM) { ! CTX->probability = 1.0; ! CTX->result = DSR_ISSPAM; ! } ! else { ! if (CTX->flags & DSF_CORPUS) { ! CTX->probability = 0.0; ! CTX->result = DSR_ISINNOCENT; ! } ! else { ! /* calculate spam score from 15 most interesting items */ ! CTX->probability = _ds_bayescalc(freq,index,15); ! if (CTX->probability>=.9) ! CTX->result = DSR_ISSPAM; ! else ! CTX->result = DSR_ISINNOCENT; ! } ! } ! /* update stats in dict, and copy signature */ ! node = tbt_first(index); ! for (i = 0 ;i < index->items; i++) { ! unsigned long long crc = node->token; ! char *token_name = lht_gettoken(freq, crc); if (CTX->flags & DSF_SIGNATURE) { memcpy((char *)CTX->signature->data + (i*sizeof(unsigned long long)), &crc, sizeof(unsigned long long)); } if (lht_getspamstat(freq, crc, &stat) || token_name == NULL) { ! node = tbt_next(node); continue; } ! if (CTX->result == DSR_ISSPAM) ! stat.spam_hits++; ! else ! stat.innocent_hits++; ! if (CTX->mode == DSM_ADDSPAM && (! (CTX->flags & DSF_CORPUS))) { ! if (--stat.innocent_hits < 0) ! stat.innocent_hits = 0; } ! if (CTX->mode == DSM_FALSEPOSITIVE) { ! if (--stat.spam_hits < 0) ! stat.spam_hits = 0; } ! _ds_save_stat(CTX, crc, &stat); ! node = tbt_next(node); } + /* update totals */ if (CTX->result == DSR_ISSPAM) { CTX->totals.total_spam++; *************** *** 745,816 **** if (CTX->totals.total_innocent<0) CTX->totals.total_innocent = 0; } - } else { - if (CTX->mode == DSM_FALSEPOSITIVE) { - CTX->totals.total_innocent++; - CTX->totals.false_positives++; - CTX->totals.total_spam--; - if (CTX->totals.total_spam<0) - CTX->totals.total_spam=0; - } else - CTX->totals.total_innocent++; } ! ! _ds_set_spamtotals(CTX); ! ! node_nt = c_nt_first(sort, &c_nt); ! ! for(i=0;i<(sort->items>15) ? 15 : sort->items && node_nt != NULL;i++) { ! unsigned long long crc; ! char *token_name; ! ! crc = *(unsigned long long *)node_nt->ptr; ! token_name = lht_gettoken(freq, crc); ! if (lht_getspamstat(freq, crc, &stat) || token_name == NULL) { ! node_nt = c_nt_next(sort, &c_nt); ! continue; ! } ! ! if (CTX->result == DSR_ISSPAM) ! stat.spam_hits++; ! else if (CTX->result != DSM_FALSEPOSITIVE) ! stat.innocent_hits++; ! ! if (CTX->mode == DSM_ADDSPAM && (! (CTX->flags & DSF_CORPUS))) ! stat.innocent_hits--; ! if (CTX->mode == DSM_FALSEPOSITIVE) { ! stat.innocent_hits++; ! stat.spam_hits--; ! if (stat.spam_hits<0) ! stat.spam_hits = 0; } - if (stat.innocent_hits<0) - stat.innocent_hits = 0; - - _ds_save_stat(CTX, crc, &stat); - node_nt = c_nt_next(sort, &c_nt); } tbt_destroy(index); lht_destroy(freq); - nt_destroy(sort); ! if (CTX->mode == DSM_FALSEPOSITIVE) { ! CTX->probability = 0.0; ! CTX->result = DSR_ISINNOCENT; ! } else if (CTX->mode == DSM_ADDSPAM) { ! CTX->probability = 1.0; ! CTX->result = DSR_ISSPAM; ! } ! ! return (CTX->result == DSR_ISSPAM) ? DSR_ISSPAM : DSR_ISINNOCENT; } int _ds_process_signature(DSPAM_CTX *CTX) { unsigned long long token; struct _ds_spam_stat s; int num_tokens, i; if (CTX->signature == NULL) { LOGDEBUG("DSF_SIGNATURE specified, but no signature provided."); --- 619,646 ---- if (CTX->totals.total_innocent<0) CTX->totals.total_innocent = 0; } } ! else { ! CTX->totals.total_innocent++; if (CTX->mode == DSM_FALSEPOSITIVE) { ! CTX->totals.false_positives++; ! if (--CTX->totals.total_spam < 0) ! CTX->totals.total_spam = 0; } } + _ds_set_spamtotals(CTX); tbt_destroy(index); lht_destroy(freq); ! return CTX->result; } int _ds_process_signature(DSPAM_CTX *CTX) { unsigned long long token; struct _ds_spam_stat s; int num_tokens, i; + int f_corpus = (CTX->flags & DSF_CORPUS); if (CTX->signature == NULL) { LOGDEBUG("DSF_SIGNATURE specified, but no signature provided."); *************** *** 831,837 **** return -1; } ! if ((CTX->result = CTX->db->open(CTX->db, NULL, CTX->dictionary, NULL, DB_BTREE, DB_CREATE, 0))!=0) { LOG(LOG_WARNING, "db->open failed: %s", db_strerror(CTX->result)); return -1; } --- 661,667 ---- return -1; } ! if ((CTX->result = CTX->db->open(CTX->db, TRANID_PLACEHOLDER CTX->dictionary, NULL, DB_BTREE, DB_CREATE, 0))!=0) { LOG(LOG_WARNING, "db->open failed: %s", db_strerror(CTX->result)); return -1; } *************** *** 844,856 **** CTX->result = -1; if (CTX->mode == DSM_FALSEPOSITIVE) { ! CTX->totals.false_positives++; ! CTX->totals.total_spam -= (CTX->totals.total_spam>0) ? 1 : 0; CTX->totals.total_innocent++; } else { CTX->totals.spam_misses++; CTX->totals.total_spam++; - CTX->totals.total_innocent -= (CTX->totals.total_innocent>0) ? 1 : 0; } num_tokens = CTX->signature->length / sizeof(unsigned long long); --- 674,689 ---- CTX->result = -1; if (CTX->mode == DSM_FALSEPOSITIVE) { ! if (!f_corpus) { ! CTX->totals.false_positives++; ! CTX->totals.total_spam -= (CTX->totals.total_spam>0) ? 1 : 0; ! } CTX->totals.total_innocent++; } else { CTX->totals.spam_misses++; + if (!f_corpus) + CTX->totals.total_innocent -= (CTX->totals.total_innocent>0) ? 1 : 0; CTX->totals.total_spam++; } num_tokens = CTX->signature->length / sizeof(unsigned long long); *************** *** 861,869 **** if (!_ds_load_stat(CTX, token, &s)) { if (CTX->mode == DSM_FALSEPOSITIVE) { s.innocent_hits ++; ! s.spam_hits -= (s.spam_hits>0) ? 1 : 0; } else { ! s.innocent_hits -= (s.innocent_hits>0) ? 1 : 0; s.spam_hits ++; } _ds_save_stat(CTX, token, &s); --- 694,704 ---- if (!_ds_load_stat(CTX, token, &s)) { if (CTX->mode == DSM_FALSEPOSITIVE) { s.innocent_hits ++; ! if (!f_corpus) ! s.spam_hits -= (s.spam_hits>0) ? 1 : 0; } else { ! if (!f_corpus) ! s.innocent_hits -= (s.innocent_hits>0) ? 1 : 0; s.spam_hits ++; } _ds_save_stat(CTX, token, &s); *************** *** 968,1080 **** return ret; } - int _ds_process_header_token (DSPAM_CTX *CTX, char *token, const char *previous_token, struct lht *freq, const char *heading) { - int all_num = 1, i; - int high_chars = 0; - char combined_token[256]; - int len = 0; - int is_received; - unsigned long long crc; - - is_received = (!strcmp(heading, "Received") ? 1 : 0); - - if (is_received && strlen(token)<6) - return -1; - - for(i=0;token[i]!=0;i++) { - if (! isdigit((int) token[i])) - all_num = 0; - if (token[i]>=127 || iscntrl((int) token[i])) - high_chars = 1; - } - - len = i-1; - - if (isdigit((int) token[0])) { - if ( token[len-1]!= '%') - all_num = 1; - } - - if (! isalnum((int) token[0]) && token[0] != '$' && token[0] != '#') - all_num = 1; - - if (is_received) - all_num = 0; - - /* Ignore tokens that are all numbers, or contain high ASCII characters */ - if (all_num || high_chars) - return -1; - - /* Ignore 'Received', 'Date', and 'Message-Id' headers */ - - if (strcasecmp(heading, "Date")) { - if (heading[0] != 0) - snprintf(combined_token, sizeof(combined_token), "%s*%s", heading, token); - else - strlcpy(combined_token, token, sizeof(combined_token)); - - crc = _ds_getcrc64(combined_token); - #ifdef VERBOSE - LOGDEBUG("Token Hit: '%s'", combined_token); - #endif - lht_hit(freq, crc, combined_token); - - if ((CTX->flags & DSF_CHAINED) && previous_token != NULL && !is_received) { - snprintf(combined_token, sizeof(combined_token), "%s*%s+%s", heading, previous_token, token); - crc = _ds_getcrc64(combined_token); - - lht_hit(freq, crc, combined_token); - } - } else { - return -1; - } - - return 0; - } - - int _ds_process_body_token(DSPAM_CTX *CTX, char *token, const char *previous_token, struct lht *freq) { - int all_num = 1, i; - int high_chars = 0; - char combined_token[256]; - int len; - unsigned long long crc; - - for(i=0;token[i]!=0;i++) { - if (! isdigit((int) token[i])) - all_num = 0; - if (token[i]>=127 || iscntrl((int) token[i])) - high_chars = 1; - } - - len = i-1; - - if (isdigit((int) token[0])) { - int l = len - 1; - if ( token[l]!= '%') - all_num = 1; - } - - if (! isalnum((int) token[0]) && token[0] != '$' && token[0] != '#') - all_num = 1; - - /* Ignore tokens that are all numbers, or contain high ASCII characters */ - if (all_num || high_chars) - return -1; - - crc = _ds_getcrc64(token); - - lht_hit(freq, crc, token); - - if ((CTX->flags & DSF_CHAINED) && previous_token != NULL) { - snprintf(combined_token, sizeof(combined_token), "%s+%s", previous_token, token); - crc = _ds_getcrc64(combined_token); - - lht_hit(freq, crc,combined_token); - } - - return 0; - } - int _ds_push_boundary(struct nt *stack, const char *boundary) { char *y = malloc(strlen(boundary)+3); if (y == NULL) --- 803,808 ---- *************** *** 1125,1128 **** } return 0; } - --- 853,855 ---- Index: dspam/libdspam.h diff -c dspam/libdspam.h:1.1.1.3 dspam/libdspam.h:1.3 *** dspam/libdspam.h:1.1.1.3 Thu Aug 7 14:19:54 2003 --- dspam/libdspam.h Tue Jul 29 17:21:06 2003 *************** *** 37,47 **** int _ds_open_db (DSPAM_CTX *CTX); int _ds_close_db (DSPAM_CTX *CTX); int _ds_operate (DSPAM_CTX *CTX, char *headers, char *body); ! struct lht *_ds_tokenize(DSPAM_CTX *CTX, char *headers, char *body); ! ! ! int _ds_process_header_token (DSPAM_CTX *CTX, char *joined_token, const char *previous_token, struct lht *tokens, const char *heading); ! int _ds_process_body_token (DSPAM_CTX *CTX, char *joined_token, const char *previous_token, struct lht *tokens); int _ds_process_signature(DSPAM_CTX *CTX); int _ds_push_boundary (struct nt *stack, const char *boundary); --- 37,43 ---- int _ds_open_db (DSPAM_CTX *CTX); int _ds_close_db (DSPAM_CTX *CTX); int _ds_operate (DSPAM_CTX *CTX, char *headers, char *body); ! struct lht *_ds_tokenize(int chained,char *headers, char *body); int _ds_process_signature(DSPAM_CTX *CTX); int _ds_push_boundary (struct nt *stack, const char *boundary); Index: dspam/libdspam_objects.h diff -c dspam/libdspam_objects.h:1.1.1.5 dspam/libdspam_objects.h:1.4 *** dspam/libdspam_objects.h:1.1.1.5 Tue Sep 2 16:30:30 2003 --- dspam/libdspam_objects.h Tue Sep 2 17:03:32 2003 *************** *** 26,31 **** --- 26,38 ---- #include #include "decode.h" + /* Adapt to use db3 when needed. */ + #if DB_VERSION_MAJOR >= 4 + #define TRANID_PLACEHOLDER NULL, + #else + #define TRANID_PLACEHOLDER + #endif + /* spam totals on disk + memory */ struct _ds_spam_totals { long total_spam; Index: dspam/localdb.c diff -c dspam/localdb.c:1.1.1.6 dspam/localdb.c:1.8 *** dspam/localdb.c:1.1.1.6 Sat Sep 6 17:40:09 2003 --- dspam/localdb.c Sat Sep 6 17:56:31 2003 *************** *** 84,90 **** int ret; if (CTX->flags & DSF_CLASSIFY) { ! _ds_get_spamtotals(CTX); /* undo changes to in memory totals */ return 0; } --- 84,91 ---- int ret; if (CTX->flags & DSF_CLASSIFY) { ! if (_ds_get_spamtotals(CTX)) /* undo changes to in memory totals */ ! memset(&CTX->totals,0,sizeof CTX->totals); return 0; } Index: dspam/lock.c diff -c dspam/lock.c:1.1.1.4 dspam/lock.c:1.2.2.1 *** dspam/lock.c:1.1.1.4 Sat Sep 6 17:40:09 2003 --- dspam/lock.c Fri Jan 30 13:30:29 2004 *************** *** 78,84 **** if (hostname != NULL && !strcmp(hostname, myhostname) && spid != NULL) { LOGDEBUG("hostname matches %s. checking pid", hostname); pid = atoi(spid); ! if (kill(pid, 0)) { retry = 0; unlink(filename); } else { --- 78,84 ---- if (hostname != NULL && !strcmp(hostname, myhostname) && spid != NULL) { LOGDEBUG("hostname matches %s. checking pid", hostname); pid = atoi(spid); ! if (kill(pid, 0) && errno == ESRCH) { retry = 0; unlink(filename); } else { *************** *** 188,194 **** if (hostname != NULL && !strcmp(hostname, myhostname) && spid != NULL) { LOGDEBUG("hostname matches %s. checking pid", hostname); pid = atoi(spid); ! if (kill(pid, 0)) { retry = 0; unlink(filename); } else { --- 188,194 ---- if (hostname != NULL && !strcmp(hostname, myhostname) && spid != NULL) { LOGDEBUG("hostname matches %s. checking pid", hostname); pid = atoi(spid); ! if (kill(pid, 0) && errno == ESRCH) { retry = 0; unlink(filename); } else { *************** *** 219,225 **** } if (retry) ! sleep(1); file = fopen(filename, "r"); } --- 219,225 ---- } if (retry) ! sleep(2); file = fopen(filename, "r"); } Index: dspam/maketest diff -c /dev/null dspam/maketest:1.2 *** /dev/null Fri Jan 30 14:25:10 2004 --- dspam/maketest Fri Aug 1 12:34:33 2003 *************** *** 0 **** --- 1,8 ---- + LIBDSPAM = .libs/libdspam.a + + run: testlibdspam + ./testlibdspam + + testlibdspam: testlibdspam.c $(LIBDSPAM) + gcc -g -o testlibdspam -DTEST_TOKENIZE testlibdspam.c \ + $(LIBDSPAM) -ldb -lcheck Index: dspam/tbt.c diff -c dspam/tbt.c:1.1.1.2 dspam/tbt.c:1.4 *** dspam/tbt.c:1.1.1.2 Tue Sep 2 16:30:30 2003 --- dspam/tbt.c Sat Sep 6 00:12:34 2003 *************** *** 38,43 **** --- 38,44 ---- node->token = token; node->left = NULL; node->right = NULL; + node->parent= NULL; return(node); } *************** *** 52,81 **** return(tbt); } ! int tbt_destroy(struct tbt *tbt) { ! if (tbt == NULL) ! return 0; ! ! tbt_deleteleft(tbt->root); ! return 0; } ! int tbt_deleteleft(struct tbt_node *node) { ! struct tbt_node *next; ! if (node == NULL) return 0; ! ! if (node->left != NULL) ! tbt_deleteleft(node->left); ! ! next = node->right; ! free(node); ! ! if (next != NULL) ! tbt_deleteleft(next); ! ! return 0; } int tbt_add(struct tbt *tbt, float probability, unsigned long long token) { --- 53,112 ---- return(tbt); } ! /* find largest delta in tree */ ! struct tbt_node *tbt_first(struct tbt *t) { ! struct tbt_node *p; ! if (!t) return NULL; ! p = t->root; ! if (!p) return NULL; ! while (p->left) ! p = p->left; ! return p; } ! /* find next smallest delta in tree by traversing in inorder. */ ! struct tbt_node *tbt_next(struct tbt_node *cur) { ! struct tbt_node *p = cur->right; ! if (p) { ! while (p->left) ! p = p->left; ! return p; ! } ! while (p = cur->parent) { ! if (p->left == cur) return p; ! cur = p; ! } ! return NULL; ! } ! /* traverse tree in postorder to delete. */ ! int tbt_destroy(struct tbt *tbt) { ! struct tbt_node *p, *q; ! int cnt; ! if (tbt == NULL) return 0; ! cnt = tbt->items; ! q = tbt_first(tbt); ! while (q) { ! /* find leftmost leaf node */ ! while (q->right) { ! q = q->right; ! while (q->left) ! q = q->left; ! } ! /* left and right have been deleted, so safe to delete. */ ! do { ! p = q; ! q = p->parent; ! #ifdef DEBUG ! memset(p,0x55,sizeof *p); /* detect algorithm failure */ ! #endif ! free(p); ! --cnt; ! } while (q && q->right == p); ! } ! free(tbt); ! return cnt; } int tbt_add(struct tbt *tbt, float probability, unsigned long long token) { *************** *** 120,125 **** --- 151,157 ---- } } + node->parent = parent; if (d == left) parent->left = node; else *************** *** 128,157 **** tbt->items++; return 0; } - - struct nt *tbt_sort(struct tbt *tbt) { - struct nt *nt; - nt = nt_create(NT_INDEX); - if (nt == NULL) - return NULL; - - tbt_addleft(tbt->root, nt); - - return nt; - } - - int tbt_addleft(struct tbt_node *node, struct nt *nt) { - - if (node == NULL) - return 0; - - if (node->left != NULL) - tbt_addleft(node->left, nt); - - nt_add(nt, (void *) &node->token); - - if (node->right != NULL) - tbt_addleft(node->right, nt); - - return 0; - } --- 160,162 ---- Index: dspam/tbt.h diff -c dspam/tbt.h:1.1.1.1 dspam/tbt.h:1.2 *** dspam/tbt.h:1.1.1.1 Thu May 22 19:25:51 2003 --- dspam/tbt.h Fri Sep 5 21:38:28 2003 *************** *** 40,45 **** --- 40,46 ---- unsigned long long token; struct tbt_node *left; struct tbt_node *right; + struct tbt_node *parent; }; /* constructor and destructor */ *************** *** 47,59 **** int tbt_destroy(struct tbt *tbt); /* read-only functions */ ! struct nt * tbt_sort(struct tbt *tbt); /* read-write functions */ int tbt_add(struct tbt *tbt, float probability, unsigned long long token); - int tbt_addleft(struct tbt_node *node, struct nt *nt); - int tbt_destroy(struct tbt *tbt); - int tbt_deleteleft(struct tbt_node *node); #endif /* _TBT_H */ --- 48,58 ---- int tbt_destroy(struct tbt *tbt); /* read-only functions */ ! struct tbt_node * tbt_first(struct tbt *tbt); ! struct tbt_node * tbt_next(struct tbt_node *node); /* read-write functions */ int tbt_add(struct tbt *tbt, float probability, unsigned long long token); #endif /* _TBT_H */ Index: dspam/testlibdspam.c diff -c /dev/null dspam/testlibdspam.c:1.22.2.1 *** /dev/null Fri Jan 30 14:25:10 2004 --- dspam/testlibdspam.c Sat Nov 15 16:15:35 2003 *************** *** 0 **** --- 1,552 ---- + #include + #include "libdspam.h" + #include "tbt.h" + #include + + #ifdef _AIX + #undef RAND_MAX /* AIX defines incorrect value for RAND_MAX */ + #define RAND_MAX 2147483647 + #endif + + const char *fname = "/tmp/test.dict"; + static const char msg1[] = "\ + From user@domain.com\n\ + Subject: Test message\n\ + To: testsys\n\ + \n\ + Testing 1 2 3\n\ + "; + + static const char spam1[] = "\ + From jerk@parasite.slime\n\ + Subject: RE: Info you requested\n\ + To: victim@lamb.com\n\ + \n\ + Limited time offer!\n\ + Click here to unsubscribe\n\ + "; + + static int + _dspam_process(DSPAM_CTX *ctx,const char *msg, int r, + const char *file,int line) { + int rc = dspam_process(ctx,msg); + if (ctx->message) { + _ds_destroy_message(ctx->message); + ctx->message = 0; + } + if (rc != r) { + char buf[80]; + sprintf(buf,"dspam_process returned %d, expected %d",rc,r); + _fail_unless(rc == r,file,line,buf); + } + return rc; + } + + #define dspam_process(ctx,msg) _dspam_process(ctx,msg,0,__FILE__,__LINE__) + #define dspam_process_rc(ctx,msg,rc) \ + _dspam_process(ctx,msg,rc,__FILE__,__LINE__) + + /* Check intended usage of CORPUS option. */ + START_TEST(test_corpus) { + DSPAM_CTX *ctx; + unlink(fname); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_CORPUS); + dspam_process(ctx,msg1); + fail_unless(ctx->result == DSR_ISINNOCENT,"result not INNOCENT"); + fail_unless(ctx->totals.total_spam == 0,"total spam not 0"); + fail_unless(ctx->totals.total_innocent == 1,"total innocent not 1"); + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_ADDSPAM,DSF_CHAINED|DSF_CORPUS); + dspam_process(ctx,spam1); + fail_unless(ctx->result == DSR_ISSPAM,"result not SPAM"); + fail_unless(ctx->totals.total_spam == 1,"total spam not 1"); + fail_unless(ctx->totals.total_innocent == 1,"total innocent not 1"); + /* beginning with 2.6.4, DSF_ADDSPAM+DSF_CORPUS counts as a miss */ + fail_unless(ctx->totals.spam_misses == 1,"total misses not 1"); + fail_unless(ctx->totals.false_positives == 0,"total fp not 0"); + /* ramp spam stats until spam1 is recognized as such */ + { int i; + for (i = 0; i < 20; ++i) + dspam_process(ctx,spam1); + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_CORPUS); + for (i = 0; i < 20; ++i) + dspam_process(ctx,msg1); + } + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_CLASSIFY); + dspam_process(ctx,spam1); + fail_unless(ctx->result == DSR_ISSPAM,"result not SPAM"); + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_CORPUS); + dspam_process(ctx,spam1); + fail_unless(ctx->result == DSR_ISINNOCENT,"result not INNOCENT"); + dspam_destroy(ctx); + } END_TEST + + static const char nasty1[] = "\ + From jerk@parasite.slime\n\ + Subject: RE: Info you requested\n\ + To: victim@lamb.com\n\ + This-Is-A-Really-Big-Header-That-Is-Designed-To-See-Whether-The-Fixed-Size\ + -Heading-Buffer-Causes-Any-Problems-With-Overflow-And-Possibly-Executing\ + -Arbitrary-Code: You Lose Sucker\n\ + \n\ + Bwa! Ha! Ha! Ha! Thisisareallylongtokenthatislongerthan25chars.\n\ + Click here to unsubscribe\n\ + "; + static const char nasty2[] = "\ + From: \"Farica Anderson\" \n\ + To: victim@lamb.com\n\ + Subject: Download this!\n\ + Date: Wed, 09 Jul 2003 15:57:36 +0000\n\ + MIME-Version: 1.0\n\ + Content-Type: text/html\n\ + Content-Transfer-Encoding: 8bit\n\ + \n\ + \n\ + "; + + /** Check possible overflow situations. Mostly, dspam checks for and ignores + * extra chars on long headings and tokens, but we check to make sure the + * checking still works. */ + START_TEST(test_overflow) { + DSPAM_CTX *ctx; + unlink(fname); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_CORPUS); + dspam_process(ctx,nasty1); + dspam_destroy(ctx); + /* This little bugger crashes 2.6.2. */ + ctx = dspam_init(fname,DSM_ADDSPAM,DSF_CHAINED|DSF_IGNOREHEADER); + dspam_process_rc(ctx,nasty2,-2); /* -2 returned when no tokens found */ + dspam_destroy(ctx); + } END_TEST + + /* Check intended usage of CLASSIFY option. No updates should take + * place. Should be able to add signature result later with CORPUS option. */ + START_TEST(test_classify) { + struct _ds_spam_totals tot; + struct _ds_spam_signature sig1,sig2; /* signature objects */ + DSPAM_CTX *ctx; + unlink(fname); + ctx = dspam_init(fname,DSM_PROCESS, DSF_CHAINED|DSF_SIGNATURE|DSF_CLASSIFY); + dspam_process(ctx,msg1); + fail_unless(ctx->result == DSR_ISINNOCENT,"result not INNOCENT"); + fail_unless(ctx->result > 0,"dspam result not positive"); + tot = ctx->totals; + sig1 = *ctx->signature; + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS, DSF_CHAINED|DSF_SIGNATURE|DSF_CLASSIFY); + dspam_process(ctx,spam1); + /* check that on_disk totals didn't change with classify */ + fail_unless(ctx->totals.total_innocent == tot.total_innocent, + "disk totals changed with CLASSIFY"); + /* check that in memory totals didn't change with classify */ + fail_unless(tot.total_innocent == 0,"memory stats changed with CLASSIFY"); + sig2 = *ctx->signature; + dspam_destroy(ctx); + /* test updating with signature after CLASSIFY */ + ctx = dspam_init(fname,DSM_ADDSPAM,DSF_CHAINED|DSF_SIGNATURE|DSF_CORPUS); + ctx->signature = &sig2; + dspam_process(ctx,NULL); + free(sig2.data); + fail_unless(ctx->totals.total_spam == 1,"total spams not 1"); + fail_unless(ctx->totals.total_innocent == 0,"total innocent not 0"); + fail_unless(ctx->totals.spam_misses == 1,"total missed not 1"); + fail_unless(ctx->totals.false_positives == 0,"total missed not 1"); + dspam_destroy(ctx); + /* not really a false positive with CORPUS flag, but... */ + ctx = dspam_init(fname, + DSM_FALSEPOSITIVE,DSF_CHAINED|DSF_SIGNATURE|DSF_CORPUS); + ctx->signature = &sig1; + dspam_process(ctx,NULL); + free(sig1.data); + fail_unless(ctx->totals.total_spam == 1,0); + fail_unless(ctx->totals.total_innocent == 1,0); + fail_unless(ctx->totals.spam_misses == 1,0); + fail_unless(ctx->totals.false_positives == 0,0); + dspam_destroy(ctx); + } END_TEST + + START_TEST(test_reverse) { + DSPAM_CTX *ctx; + struct _ds_spam_signature sig1,sig2; /* signature objects */ + unlink(fname); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_SIGNATURE); + dspam_process(ctx,msg1); + sig1 = *ctx->signature; + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_SIGNATURE); + dspam_process(ctx,spam1); + sig2 = *ctx->signature; + fail_unless(ctx->totals.total_spam == 0,0); + fail_unless(ctx->totals.total_innocent == 2,0); + fail_unless(ctx->totals.spam_misses == 0,0); + fail_unless(ctx->totals.false_positives == 0,0); + dspam_destroy(ctx); + /* change our mind about spam1 */ + ctx = dspam_init(fname,DSM_ADDSPAM,DSF_CHAINED|DSF_SIGNATURE); + ctx->signature = &sig2; + dspam_process(ctx,0); + fail_unless(ctx->totals.total_spam == 1,0); + fail_unless(ctx->totals.total_innocent == 1,0); + fail_unless(ctx->totals.spam_misses == 1,0); + fail_unless(ctx->totals.false_positives == 0,0); + dspam_destroy(ctx); + /* change our mind again */ + ctx = dspam_init(fname,DSM_FALSEPOSITIVE,DSF_CHAINED); + dspam_process(ctx,spam1); + fail_unless(ctx->totals.total_spam == 0,0); + fail_unless(ctx->totals.total_innocent == 2,0); + fail_unless(ctx->totals.spam_misses == 1,0); + fail_unless(ctx->totals.false_positives == 1,0); + dspam_destroy(ctx); + /* and change our mind about msg1 */ + ctx = dspam_init(fname,DSM_ADDSPAM,DSF_CHAINED); + dspam_process(ctx,msg1); + fail_unless(ctx->totals.total_spam == 1,0); + fail_unless(ctx->totals.total_innocent == 1,0); + fail_unless(ctx->totals.spam_misses == 2,0); + fail_unless(ctx->totals.false_positives == 1,0); + dspam_destroy(ctx); + /* test adding a signature as a corpus */ + ctx = dspam_init(fname,DSM_ADDSPAM,DSF_CHAINED|DSF_SIGNATURE|DSF_CORPUS); + ctx->signature = &sig1; + dspam_process(ctx,0); + fail_unless(ctx->totals.total_spam == 2,0); + fail_unless(ctx->totals.total_innocent == 1,0); + fail_unless(ctx->totals.spam_misses == 3,0); + fail_unless(ctx->totals.false_positives == 1,0); + dspam_destroy(ctx); + + free(sig1.data); + free(sig2.data); + } END_TEST + + /* Check that quoted printable encoded attachments are tokenized + * the same as unencoded. */ + static const char msg_7bit[] = "\ + From user@domain.com\n\ + Subject: Test message\n\ + To: testsys\n\ + Content-Type: text/plain; charset=\"us-ascii\"\n\ + Content-Transfer-Encoding: 7bit\n\ + \n\ + Testing 1 2 3\n\ + "; + + static const char msg_quopri[] = "\ + From user@domain.com\n\ + Subject: Test message\n\ + To: testsys\n\ + Content-Type: text/plain; charset=\"us-ascii\"\n\ + Content-Transfer-Encoding: quoted-printable\n\ + \n\ + T=65st=\n\ + ing 1 2 3\n\ + "; + + static const char msg_base64[] = "\ + From user@domain.com\n\ + Subject: Test message\n\ + To: testsys\n\ + Content-Type: text/plain; charset=\"us-ascii\"\n\ + Content-Transfer-Encoding: base64\n\ + \n\ + VGVzdGluZyAxIDIgMwo= + "; + + START_TEST(test_encoding) { + DSPAM_CTX *ctx; + struct _ds_spam_signature sig1,sig2,sig3; /* signature objects */ + unlink(fname); + ctx = dspam_init(fname,DSM_PROCESS, + DSF_CHAINED|DSF_SIGNATURE|DSF_IGNOREHEADER|DSF_CLASSIFY); + dspam_process(ctx,msg_7bit); + sig1 = *ctx->signature; + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS, + DSF_CHAINED|DSF_SIGNATURE|DSF_IGNOREHEADER|DSF_CLASSIFY); + dspam_process(ctx,msg_quopri); + sig2 = *ctx->signature; + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS, + DSF_CHAINED|DSF_SIGNATURE|DSF_IGNOREHEADER|DSF_CLASSIFY); + dspam_process(ctx,msg_base64); + sig3 = *ctx->signature; + fail_unless(sig2.length == sig1.length,0); + fail_unless(sig3.length == sig1.length,0); + fail_unless(memcmp(sig3.data,sig1.data,sig1.length) == 0, + "base64 decode failed"); + fail_unless(memcmp(sig2.data,sig1.data,sig1.length) == 0, + "quopri decode failed"); + free(sig1.data); + free(sig2.data); + free(sig3.data); + dspam_destroy(ctx); + } END_TEST + + /* Check that we do not try to tokenize media attachments. */ + + static const char msg_media1[] = "\ + Subject: Shipments 1099 and 1103 Benderson \n\ + To: Pina.Coloda@dada.com\n\ + X-Mailer: Lotus Notes Release 5.0.9a January 7, 2002\n\ + From: Borealis.Hernandez@dada.com\n\ + Date: Sat, 8 Nov 2003 12:33:44 -0300\n\ + 2003) at 11/08/2003 10:51:13 AM\n\ + MIME-Version: 1.0\n\ + Content-type: multipart/mixed; \n\ + Boundary=\"0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80\"\n\ + Content-Disposition: inline\n\ + \n\ + --0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80\n\ + Content-type: text/plain; charset=us-ascii\n\ + \n\ + I'm sending the following invoices\n\ + \n\ + \n\ + --0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80\n\ + Content-type: application/pdf; \n\ + name=\"Shipments 1099 to 1103 Benderson.pdf\"\n\ + Content-Disposition: attachment;\n\ + filename=\"Shipments 1099 to 1103 Benderson.pdf\"\n\ + Content-transfer-encoding: base64\n\ + \n\ + JVBERi0xLjQNJeLjz9MNCjEgMCBvYmoNPDwgDS9UeXBlIC9DYXRhbG9nIA0vUGFnZXMgMiAwIFIg\n\ + OTk1YTY5MWExPl0NPj4Nc3RhcnR4cmVmDTI0MDk5ODQNJSVFT0YN\n\ + \n\ + --0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80--\n\ + \n\ + "; + + static const char msg_media2[] = "\ + Subject: Shipments 1099 and 1103 Benderson \n\ + To: Pina.Coloda@dada.com\n\ + X-Mailer: Lotus Notes Release 5.0.9a January 7, 2002\n\ + From: Borealis.Hernandez@dada.com\n\ + Date: Sat, 8 Nov 2003 12:33:44 -0300\n\ + 2003) at 11/08/2003 10:51:13 AM\n\ + MIME-Version: 1.0\n\ + Content-type: multipart/mixed; \n\ + Boundary=\"0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80\"\n\ + Content-Disposition: inline\n\ + \n\ + --0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80\n\ + Content-type: text/plain; charset=us-ascii\n\ + \n\ + I'm sending the following invoices\n\ + \n\ + \n\ + --0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80\n\ + Content-type: application/pdf; \n\ + name=\"Shipments 1099 to 1103 Benderson.pdf\"\n\ + Content-Disposition: attachment;\n\ + filename=\"Shipments 1099 to 1103 Benderson.pdf\"\n\ + Content-transfer-encoding: base64\n\ + \n\ + JVBERi0xLjQNJeLjz9MNCjEgMCBvYmfjagofyasdfXBlIC9DYXRhbG9nIA0vUGFnZXMgMiAwIFIg\n\ + DS9NZXRhZGF0YSA0NiAwIFIgDT4+IA1lbmRvYmoNMiAwIG9iag08PCANL1R5cGUgL1BhZ2VzIA0v\n\ + S2lkcyBbIDUgMCBSIDEwIDAgUiAxNCAwIFIgMTggMCBSIDIyIDAgUiAyNiAwIFIgMzAgMCBSIDM0\n\ + IDAgUiAzOCAwIFIgNDIgMCBSIA1dIA0vQ291bnQgMTAgDT4+IA1lbmRvYmoNMyAwIG9iag08PCAN\n\ + L01vZERhdGUgKEQ6MjAwMzExMDgxMDA2MzAtMDMnMDAnKQ0vQ3JlYXRpb25EYXRlIChEOjIwMDMx\n\ + MTA4MTAwNjE2LTAzJzAwJykNL1Byb2R1Y2VyIChBZG9iZSBQREYgTGlicmFyeSA1LjApDS9DcmVh\n\ + dG9yIChIUCBQREYgRm9ybWF0dGVyIHZlcnNpb24gMS4wLjAuMTQ4KQ0+PiANZW5kb2JqDTUgMCBv\n\ + YmoNPDwgDS9UeXBlIC9QYWdlIA0vTWVkaWFCb3ggWyAwIDAgNjAwLjQ4IDI3MC43MiBdIA0vUGFy\n\ + ZW50IDIgMCBSIA0vQ29udGVudHMgOCAwIFIgDS9SZXNvdXJjZXMgPDwgL1hPYmplY3QgPDwgL0lt\n\ + MCA3IDAgUiA+PiAvUHJvY1NldCBbIC9QREYgL0ltYWdlQiBdID4+IA0+PiANZW5kb2JqDTcgMCBv\n\ + YmoNPDwgL1R5cGUgL1hPYmplY3QgL1N1YnR5cGUgL0ltYWdlIC9OYW1lIC9YIC9XaWR0aCAxMjUx\n\ + NjQ0ODAxMDViZjg5MDlkYzA5ZTdiOWU5MDQ3MzY1MWQ+PGUwNzMxYWJlNDk0NTk1NjY0NDBmYTMx\n\ + OTk1YTY5MWExPl0NPj4Nc3RhcnR4cmVmDTI0MDk5ODQNJSVFT0YN\n\ + \n\ + --0__=8CBBE74BDFC6CB808f9e8a93df938690918c8CBBE74BDFC6CB80--\n\ + \n\ + "; + + START_TEST(test_mediaskip) { + DSPAM_CTX *ctx; + struct _ds_spam_signature sig1,sig2; /* signature objects */ + unlink(fname); + ctx = dspam_init(fname,DSM_PROCESS, + DSF_CHAINED|DSF_SIGNATURE|DSF_IGNOREHEADER|DSF_CLASSIFY); + dspam_process(ctx,msg_media1); + sig1 = *ctx->signature; + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS, + DSF_CHAINED|DSF_SIGNATURE|DSF_IGNOREHEADER|DSF_CLASSIFY); + dspam_process(ctx,msg_media2); + sig2 = *ctx->signature; + dspam_destroy(ctx); + /* The two media msgs differ only in the media attachment, so + * the signatures should be identical. */ + fail_unless(sig2.length == sig1.length,"media skip failed"); + fail_unless(memcmp(sig2.data,sig1.data,sig1.length)==0,"media skip failed"); + free(sig1.data); + free(sig2.data); + } END_TEST + + /* Check that HTML comments do not split tokens. */ + + static const char msg_html1[] = "\ + From user@domain.com\n\ + Subject: Test message\n\ + To: testsys\n\ + Content-Type: text/html; charset=\"us-ascii\"\n\ + Content-Transfer-Encoding: 7bit\n\ + \n\ + \n\ + Buy our prescription Viagra!\n\ + \n\ + "; + + static const char msg_html2[] = "\ + From user@domain.com\n\ + Subject: Test message\n\ + To: testsys\n\ + Content-Type: text/html; charset=\"us-ascii\"\n\ + Content-Transfer-Encoding: 7bit\n\ + \n\ + \n\ + Buy our prescription Viagra!\n\ + \n\ + "; + + START_TEST(test_html) { + DSPAM_CTX *ctx; + struct _ds_spam_signature sig1,sig2; /* signature objects */ + unlink(fname); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_SIGNATURE|DSF_CLASSIFY); + dspam_process(ctx,msg_html1); + sig1 = *ctx->signature; + dspam_destroy(ctx); + ctx = dspam_init(fname,DSM_PROCESS,DSF_CHAINED|DSF_SIGNATURE|DSF_CLASSIFY); + dspam_process(ctx,msg_html2); + sig2 = *ctx->signature; + fail_unless(sig1.length == sig2.length + && memcmp(sig2.data,sig1.data,sig1.length) == 0, + "HTML comment stripping failed"); + free(sig1.data); + free(sig2.data); + dspam_destroy(ctx); + } END_TEST + + static void verify_tbt(struct tbt *tbt,int items) { + double delta = 1.0; + int cnt = 0; + struct tbt_node *node = tbt_first(tbt); + fail_unless(tbt->items == items,"tbt_add lost items"); + while (node) { + fail_unless(node->delta <= delta,"deltas not in descending order"); + delta = node->delta; + ++cnt; + node = tbt_next(node); + } + fail_unless(cnt == items,"tbt sort lost items"); + } + + /* test token delta sorting */ + START_TEST(test_tbt) { + struct tbt *tbt = tbt_create(); + unsigned long long crc = 0; + char buf[80]; + int i; + srandom(5551212L); + for (i = 0; i < 5000; ++i) { + double prob = (double)random() / (double)RAND_MAX; + fail_unless(prob <= 1.0 && prob >= 0.0,"problem with random() or RAND_MAX"); + tbt_add(tbt,prob,++crc); + } + verify_tbt(tbt,5000); + i = tbt_destroy(tbt); + sprintf(buf,"tbt_destroy returned %d",i); + fail_unless(i == 0,buf); + + tbt = tbt_create(); + /* worst case is that all tokens have equal delta. */ + for (i = 0; i < 2000; ++i) tbt_add(tbt,0.7,++crc); + for (i = 0; i < 2000; ++i) tbt_add(tbt,0.3,++crc); + verify_tbt(tbt,4000); + i = tbt_destroy(tbt); + sprintf(buf,"tbt_destroy returned %d",i); + fail_unless(i == 0,buf); + } END_TEST + + #ifdef TEST_TOKENIZE + + static struct lht * + tokenize(int chained,const char *msg) { + char *edup = strdup(msg); + char *p; + struct lht *freq; + if (edup == 0) return 0; + p = strstr(edup,"\n\n"); + if (p) { + *p++ = 0; + freq = _ds_tokenize(chained,edup,p); + } + else + freq = _ds_tokenize(chained," ",edup); + free(edup); + return freq; + } + + /* tokenize a simple message */ + START_TEST(test_tokenize) { + struct lht *freq; + struct lht_node *node_lht; + struct lht_c c_lht; + int tokens = 0; + + freq = tokenize(1,nasty1); + fail_unless(freq != 0,"out of memory"); + node_lht = c_lht_first(freq, &c_lht); + while (node_lht != NULL) { + char buf[256]; + sprintf(buf,"%s: %d\n",node_lht->token_name,node_lht->frequency); + if (strcmp("Ha",node_lht->token_name) == 0) + fail_unless(node_lht->frequency == 3,buf); + else if (strcmp("Ha+Ha",node_lht->token_name) == 0) + fail_unless(node_lht->frequency == 2,buf); + else + fail_unless(node_lht->frequency == 1,buf); + tokens += node_lht->frequency; + node_lht = c_lht_next(freq, &c_lht); + } + fail_unless(tokens == 32,"token count not 32"); + lht_destroy(freq); + fflush(stdout); + } END_TEST + #endif + + /* Collect all the tests. This will make more sense when tests are + * in multiple source files. */ + Suite *dspam_suite (void) { + Suite *s = suite_create ("DSPAM"); + TCase *tc_process = tcase_create ("PROCESS"); + + suite_add_tcase (s, tc_process); + tcase_add_test (tc_process, test_corpus); + tcase_add_test (tc_process, test_classify); + tcase_add_test (tc_process, test_overflow); + #ifdef TEST_TOKENIZE + tcase_add_test (tc_process, test_tokenize); + #endif + tcase_add_test (tc_process, test_reverse); + tcase_add_test (tc_process, test_encoding); + tcase_add_test (tc_process, test_mediaskip); + tcase_add_test (tc_process, test_html); + tcase_add_test (tc_process, test_tbt); + return s; + } + + int main (void) { + int nf; + Suite *s = dspam_suite (); + SRunner *sr = srunner_create (s); + srunner_run_all (sr, CK_NORMAL); + nf = srunner_ntests_failed (sr); + srunner_free (sr); + suite_free (s); + return (nf == 0) ? EXIT_SUCCESS : EXIT_FAILURE; + } Index: dspam/token.c diff -c /dev/null dspam/token.c:1.5 *** /dev/null Fri Jan 30 14:25:11 2004 --- dspam/token.c Sat Sep 6 17:56:31 2003 *************** *** 0 **** --- 1,378 ---- + /* + DSPAM + COPYRIGHT (C) 2003 NETWORK DWEEBS CORPORATION + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + */ + #ifdef HAVE_CONFIG_H + #include + #endif + + #include + #include "config.h" + #include "error.h" + #include "nodetree.h" + #include "lht.h" + #include "localdb.h" + + static int _ds_process_header_token (int chained, char *joined_token, const char *previous_token, struct lht *tokens, const char *heading); + static int _ds_process_body_token (int chained, char *joined_token, const char *previous_token, struct lht *tokens); + + /* _ds_tokenize: tokenize the message + parameters: int chained true to chain tokens + char *header pointer to message header (trashed) + char *body pointer to message body (trashed) + + returns: struct lht * counted tokens + */ + + struct lht *_ds_tokenize(int chained,char *headers, char *body) { + + char *token; /* current token */ + char joined_token[32]; /* used for de-obfuscating tokens */ + char *previous_token = NULL; /* used for chained tokens */ + + char *line = NULL; /* header broken up into lines */ + char *url_body; /* urls broken up */ + + char heading[128]; /* current heading */ + int num_tokens; /* number of tokens total */ + int max_tokens; /* max # tokens we will process */ + int alloc_joined; /* track joined token free()'s */ + int i; + + /* Long Hashed Token Tree: Track tokens, frequencies, and stats */ + struct lht *freq = lht_create(1543); + struct nt *header; /* header array */ + struct nt_node *node_nt; + struct nt_c c_nt; + + joined_token[0] = 0; + + header = nt_create(NT_CHAR); + + if (freq == NULL || header == NULL) { + nt_destroy(header); + lht_destroy(freq); + LOGDEBUG("memory allocation failed"); + return NULL; + } + + /* HEADER: Split up the text into tokens, include heading */ + line = strtok(headers, "\n"); + while(line != NULL) { + nt_add(header, line); + line = strtok(NULL, "\n"); + } + + node_nt = c_nt_first(header, &c_nt); + heading[0] = 0; + + while(node_nt != NULL) { + int is_received; + joined_token[0] = 0; + alloc_joined = 0; + + line = node_nt->ptr; + token = strtok(line, ":"); + if (token != NULL && token[0] != 32 && token[0] != 9 && !strstr(token, " ")) { + strlcpy(heading, token, 128); + previous_token = NULL; + } + + #ifdef VERBOSE + LOGDEBUG("Reading '%s' header", heading); + #endif + + is_received = (!strcmp(heading, "Received") ? 1 : 0); + + if (is_received) + token = strtok(NULL, DELIMITERS_HEADING); + else + token = strtok(NULL, DELIMITERS); + + while(token != NULL) { + int l; + + l = strlen(token); + if ((l>2 && l<25) || (l == 2 && !strchr(token, '$') && !strchr(token, '!'))) { + + #ifdef VERBOSE + LOGDEBUG("Processing '%s' token in '%s' header", token, heading); + #endif + + /* If we had to join a token together (e.g. S E X), process it */ + if (joined_token[0] != 0) { + if (strlen(joined_token)<25 && joined_token[1]!=0) { + if (! _ds_process_header_token(chained, joined_token, previous_token, freq, heading) && chained) { + alloc_joined = 1; + previous_token = strdup(joined_token); + } + } + joined_token[0] = 0; + } + + if (! _ds_process_header_token(chained, token, previous_token, freq, heading) && chained) { + if (alloc_joined) { + free(previous_token); + alloc_joined = 0; + } + previous_token = token; + } + + } else if (l==1 || (l==2 && (strchr(token, '$') || strchr(token, '!'))) ) { + strlcat(joined_token, token, sizeof(joined_token)); + } + + if (is_received) + token = strtok(NULL, DELIMITERS_HEADING); + else + token = strtok(NULL, DELIMITERS); + } + node_nt = c_nt_next(header, &c_nt); + + if (joined_token[0] != 0) { + if (strlen(joined_token)<25 && joined_token[1]!=0) { + _ds_process_header_token(chained, joined_token, previous_token, freq, heading); + } + } + + } + + nt_destroy(header); + + if (alloc_joined) + free(previous_token); + + previous_token = NULL; + + /* BODY: Split up URLs into tokens, count frequency */ + url_body = strdup(body); + if (url_body != NULL) { + char combined_token[256]; + char *url_ptr = url_body; + int url_length; + unsigned long long crc; + + token = strstr(url_ptr, "http://"); + while(token != NULL) { + url_ptr = token; + + token = strtok(token, " \n\">"); + if (token != NULL) { + url_length = strlen(token); + + /* Individual tokens form the URL */ + token = strtok(token, DELIMITERS); + while(token != NULL) { + snprintf(combined_token, sizeof(combined_token), "Url*%s", token); + crc = _ds_getcrc64(combined_token); + lht_hit(freq, crc, combined_token); + token = strtok(NULL, DELIMITERS); + } + + memset(body+(url_ptr-url_body), 32, url_length); + url_ptr += url_length + 1; + token = strstr(url_ptr, "http://"); + } else + token = NULL; + } + free(url_body); + } + + url_body = strdup(body); + if (url_body != NULL) { + char combined_token[256]; + char *url_ptr = url_body; + int url_length; + unsigned long long crc; + + url_ptr = url_body; + token = strstr(url_ptr, "href=\""); + while(token != NULL) { + url_ptr = token+6; + + token = strtok(url_ptr, " \n\">"); + if (token != NULL) { + url_length = strlen(token); + + /* Individual tokens form the URL */ + token = strtok(url_ptr, DELIMITERS); + while(token != NULL) { + snprintf(combined_token, sizeof(combined_token), "Url*%s", token); + crc = _ds_getcrc64(combined_token); + lht_hit(freq, crc, combined_token); + token = strtok(NULL, DELIMITERS); + } + + memset(body+(url_ptr-url_body), 32, url_length); + + url_ptr += url_length + 1; + token = strstr(url_ptr, "href=\""); + } else + token = NULL; + } + + free(url_body); + } + + /* BODY: Split up the text into tokens, count frequency */ + joined_token[0] = 0; + alloc_joined = 0; + token = strtok(body, DELIMITERS); + while(token != NULL) { + int l = strlen(token); + if ((l>2 && l<25) || (l == 2 && !strchr(token, '$') && !strchr(token, '!'))) { + /* If we had to join a token together (e.g. S E X), process it */ + if (joined_token[0] != 0) { + if (strlen(joined_token)<25 && joined_token[1]!=0) { + if (! _ds_process_body_token(chained, joined_token, previous_token, freq) && chained) { + alloc_joined = 1; + previous_token = strdup(joined_token); + } + } + joined_token[0] = 0; + } + + if (! _ds_process_body_token(chained, token, previous_token, freq) && chained) { + if (alloc_joined) { + alloc_joined = 0; + free(previous_token); + } + previous_token = token; + } + } else if (l==1 || (l==2 && (strchr(token, '$') || strchr(token, '!'))) ) { + strlcat(joined_token, token, sizeof(joined_token)); + } + token = strtok(NULL, DELIMITERS); + } + + if (joined_token[0] != 0) { + if (strlen(joined_token)<25 && joined_token[1]!=0) { + _ds_process_body_token(chained, joined_token, previous_token, freq); + } + } + + return freq; + + } + + static int _ds_process_header_token (int chained, char *token, const char *previous_token, struct lht *freq, const char *heading) { + int all_num = 1, i; + int high_chars = 0; + char combined_token[256]; + int len = 0; + int is_received; + unsigned long long crc; + + is_received = (!strcmp(heading, "Received") ? 1 : 0); + + if (is_received && strlen(token)<6) + return -1; + + for(i=0;token[i]!=0;i++) { + if (! isdigit((int) token[i])) + all_num = 0; + if (token[i]>=127 || iscntrl((int) token[i])) + high_chars = 1; + } + + len = i-1; + + if (isdigit((int) token[0])) { + if ( token[len-1]!= '%') + all_num = 1; + } + + if (! isalnum((int) token[0]) && token[0] != '$' && token[0] != '#') + all_num = 1; + + if (is_received) + all_num = 0; + + /* Ignore tokens that are all numbers, or contain high ASCII characters */ + if (all_num || high_chars) + return -1; + + /* Ignore 'Received', 'Date', and 'Message-Id' headers */ + + if (strcasecmp(heading, "Date")) { + if (heading[0] != 0) + snprintf(combined_token, sizeof(combined_token), "%s*%s", heading, token); + else + strlcpy(combined_token, token, sizeof(combined_token)); + + crc = _ds_getcrc64(combined_token); + #ifdef VERBOSE + LOGDEBUG("Token Hit: '%s'", combined_token); + #endif + lht_hit(freq, crc, combined_token); + + if (chained && previous_token != NULL && !is_received) { + snprintf(combined_token, sizeof(combined_token), "%s*%s+%s", heading, previous_token, token); + crc = _ds_getcrc64(combined_token); + + lht_hit(freq, crc, combined_token); + } + } else { + return -1; + } + + return 0; + } + + static int _ds_process_body_token(int chained, char *token, const char *previous_token, struct lht *freq) { + int all_num = 1, i; + int high_chars = 0; + char combined_token[256]; + int len; + unsigned long long crc; + + for(i=0;token[i]!=0;i++) { + if (! isdigit((int) token[i])) + all_num = 0; + if (token[i]>=127 || iscntrl((int) token[i])) + high_chars = 1; + } + + len = i-1; + + if (isdigit((int) token[0])) { + int l = len - 1; + if ( token[l]!= '%') + all_num = 1; + } + + if (! isalnum((int) token[0]) && token[0] != '$' && token[0] != '#') + all_num = 1; + + /* Ignore tokens that are all numbers, or contain high ASCII characters */ + if (all_num || high_chars) + return -1; + + crc = _ds_getcrc64(token); + + lht_hit(freq, crc, token); + + if (chained && previous_token != NULL) { + snprintf(combined_token, sizeof(combined_token), "%s+%s", previous_token, token); + crc = _ds_getcrc64(combined_token); + + lht_hit(freq, crc,combined_token); + } + + return 0; + } Index: dspam/cgi/dspam.cgi diff -c dspam/cgi/dspam.cgi:1.1.1.2 dspam/cgi/dspam.cgi:1.5 *** dspam/cgi/dspam.cgi:1.1.1.2 Tue Sep 2 16:30:30 2003 --- dspam/cgi/dspam.cgi Fri Oct 24 11:56:46 2003 *************** *** 26,32 **** $CONFIG{'USERDIR'} = "/etc/mail/dspam"; $CONFIG{'ME'} = "dspam.cgi"; $CONFIG{'DOMAIN'} = "yourdomain.com"; ! $CONFIG{'DSPAM'} = "/usr/local/bin/dspam"; $CONFIG{'LARGE_SCALE'} = 0; # ## End Configuration --- 26,32 ---- $CONFIG{'USERDIR'} = "/etc/mail/dspam"; $CONFIG{'ME'} = "dspam.cgi"; $CONFIG{'DOMAIN'} = "yourdomain.com"; ! $CONFIG{'DSPAM'} = "/usr/local/bin/falsepositive"; $CONFIG{'LARGE_SCALE'} = 0; # ## End Configuration *************** *** 263,269 **** close(FILE); $DATA{'MESSAGE'} = <<_END; !
SPAM Blackhole: Email Quarantine
Click Here to Return
--- 263,269 ---- close(FILE); $DATA{'MESSAGE'} = <<_END; ! SPAM Blackhole: Email Quarantine
Click Here to Return
*************** *** 355,361 **** $DATA{'MESSAGE'} .= <<_END;   !       $start     --- 355,361 ---- $DATA{'MESSAGE'} .= <<_END;   !       $start     *************** *** 373,379 **** $DATA{'MESSAGE'} .= <<_END;
!   _END &output(%DATA); --- 373,379 ---- $DATA{'MESSAGE'} .= <<_END;
! _END &output(%DATA); *************** *** 453,459 ****

  If you have encountered a SPAM that was not caught by DSPAM ! please forward it to spam-$ENV{'REMOTE_USER'}\@$CONFIG{'DOMAIN'} where it will be contextually analyzed by our software and added to your statistical calculations. _end --- 453,459 ----

  If you have encountered a SPAM that was not caught by DSPAM ! please forward it to spam\@$CONFIG{'DOMAIN'} where it will be contextually analyzed by our software and added to your statistical calculations. _end Index: dspam/cgi/template.html diff -c dspam/cgi/template.html:1.1.1.1 dspam/cgi/template.html:1.3 *** dspam/cgi/template.html:1.1.1.1 Fri Apr 25 09:30:32 2003 --- dspam/cgi/template.html Wed Sep 10 22:45:19 2003 *************** *** 1,9 **** DSPAM v2 Control Center ! !
$HEADER$

$MESSAGE$ --- 1,9 ---- DSPAM v2 Control Center ! !
$HEADER$

$MESSAGE$ *************** *** 11,18 ****
DSPAM is a trademark of Network Dweebs Corporation
! Copyright(c) 2003, Network Dweebs Corporation. http://www.networkdweebs.com
--- 11,22 ----
+ Installed and customized by Business Management Systems + http://www.bmsi.com +

DSPAM is a trademark of Network Dweebs Corporation
! Copyright(c) 2003, Network Dweebs Corporation. ! http://www.networkdweebs.com

Index: dspam/tools/dspam_clean.c diff -c dspam/tools/dspam_clean.c:1.1.1.6 dspam/tools/dspam_clean.c:1.6 *** dspam/tools/dspam_clean.c:1.1.1.6 Tue Sep 2 16:30:30 2003 --- dspam/tools/dspam_clean.c Tue Sep 2 17:03:42 2003 *************** *** 130,136 **** snprintf(filename, MAX_FILENAME_LENGTH, "%s/%s", directory, entry->d_name); db_create(&dict, NULL, 0); ! ret = dict->open(dict, NULL, filename, NULL, DB_BTREE, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); --- 130,136 ---- snprintf(filename, MAX_FILENAME_LENGTH, "%s/%s", directory, entry->d_name); db_create(&dict, NULL, 0); ! ret = dict->open(dict, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); *************** *** 141,147 **** snprintf(filename, MAX_FILENAME_LENGTH, "%s/%s.new", directory, entry->d_name); db_create(&newdict, NULL, 0); ! ret = newdict->open(newdict, NULL, filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); --- 141,147 ---- snprintf(filename, MAX_FILENAME_LENGTH, "%s/%s.new", directory, entry->d_name); db_create(&newdict, NULL, 0); ! ret = newdict->open(newdict, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); Index: dspam/tools/dspam_convert.c diff -c dspam/tools/dspam_convert.c:1.1.1.4 dspam/tools/dspam_convert.c:1.3 *** dspam/tools/dspam_convert.c:1.1.1.4 Tue Sep 2 16:30:30 2003 --- dspam/tools/dspam_convert.c Tue Sep 2 17:03:42 2003 *************** *** 118,124 **** snprintf(filename, MAX_FILENAME_LENGTH, "%s/%s", USERDIR, entry->d_name); db_create(&dict, NULL, 0); ! ret = dict->open(dict, NULL, filename, NULL, DB_BTREE, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); --- 118,124 ---- snprintf(filename, MAX_FILENAME_LENGTH, "%s/%s", USERDIR, entry->d_name); db_create(&dict, NULL, 0); ! ret = dict->open(dict, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); *************** *** 144,150 **** } */ ! ret = newdict->open(newdict, NULL, filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); --- 144,150 ---- } */ ! ret = newdict->open(newdict, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); Index: dspam/tools/dspam_corpus diff -c /dev/null dspam/tools/dspam_corpus:1.1.1.1 *** /dev/null Fri Jan 30 14:25:11 2004 --- dspam/tools/dspam_corpus Tue Jun 3 16:36:22 2003 *************** *** 0 **** --- 1,44 ---- + #!/usr/bin/perl + + use strict; + use vars qw { $USER $IS_SPAM @buffer $line $file }; + + # dspam_corpus: small tool to automatically add a corpus of mail to a dictionary + + # Syntax: dspam_corpus [username] [filename] [--addspam] + # Flags: --addspam corpus is known spam + + my($USER) = shift; + $file = shift; + + if ($USER eq "" || $file eq "") { + print "Syntax: $0 [user] [filename] [--addspam]\n"; + exit; + } + + $line = 0; + + my($IS_SPAM) = shift; + + open(FILE, "<$file") || die "$file: $!"; + while() { + if (/^From / && $line>0) { + call_dspam(); + } + push(@buffer, $_); + $line++; + } + + print "Calling DSPAM...\n"; + call_dspam(); + close(FILE); + + sub call_dspam { + print "Calling DSPAM...\n"; + open(PIPE, "|/usr/local/bin/dspam --corpus -d $USER $IS_SPAM") || die $!; + foreach(@buffer) { + print PIPE $_; + } + close(PIPIE); + @buffer = ( ); + } Index: dspam/tools/dspam_dump.c diff -c dspam/tools/dspam_dump.c:1.1.1.5 dspam/tools/dspam_dump.c:1.4 *** dspam/tools/dspam_dump.c:1.1.1.5 Sat Sep 6 17:40:09 2003 --- dspam/tools/dspam_dump.c Sat Sep 6 17:56:31 2003 *************** *** 105,111 **** } */ ! ret = dbp->open(dbp, NULL, filename, NULL, DB_BTREE, DB_RDONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { fprintf(stderr, "unable to open %s for reading: %s\n", filename, db_strerror(ret)); return ret; --- 105,111 ---- } */ ! ret = dbp->open(dbp, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, DB_RDONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { fprintf(stderr, "unable to open %s for reading: %s\n", filename, db_strerror(ret)); return ret; Index: dspam/tools/dspam_genaliases diff -c /dev/null dspam/tools/dspam_genaliases:1.1.1.1 *** /dev/null Fri Jan 30 14:25:11 2004 --- dspam/tools/dspam_genaliases Tue Sep 2 16:30:30 2003 *************** *** 0 **** --- 1,17 ---- + #!/usr/bin/perl + + use strict; + use vars qw { $USER $OUTPUT $DSPAM $MIN_UID}; + + $MIN_UID = 500; + $DSPAM = "/usr/local/bin/dspam"; + $OUTPUT = "/tmp/dspam.aliases"; + + open(FILE, "$OUTPUT") || die $!; + while() { + ($USER) = split(/\:/); + print OUT qq!spam-$USER:\t"|$DSPAM -d $USER --addspam"\n!; + } + close(OUT); + close(FILE); Index: dspam/tools/dspam_purge.c diff -c dspam/tools/dspam_purge.c:1.1.1.4 dspam/tools/dspam_purge.c:1.3 *** dspam/tools/dspam_purge.c:1.1.1.4 Tue Sep 2 16:30:30 2003 --- dspam/tools/dspam_purge.c Tue Sep 2 17:03:42 2003 *************** *** 151,157 **** return -1; } ! ret = dict->open(dict, NULL, filename, NULL, DB_BTREE, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); --- 151,157 ---- return -1; } ! ret = dict->open(dict, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); *************** *** 162,168 **** snprintf(filename, MAX_FILENAME_LENGTH, "%s.new", file); db_create(&newdict, NULL, 0); ! ret = newdict->open(newdict, NULL, filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); --- 162,168 ---- snprintf(filename, MAX_FILENAME_LENGTH, "%s.new", file); db_create(&newdict, NULL, 0); ! ret = newdict->open(newdict, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, DB_CREATE, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { file_error(ERROR_FILE_WRITE, filename, db_strerror(ret)); _ds_file_unlock(lock); Index: dspam/tools/dspam_stats.c diff -c dspam/tools/dspam_stats.c:1.1.1.6 dspam/tools/dspam_stats.c:1.6 *** dspam/tools/dspam_stats.c:1.1.1.6 Tue Sep 2 16:30:30 2003 --- dspam/tools/dspam_stats.c Tue Sep 2 17:03:42 2003 *************** *** 156,162 **** } */ ! ret = dbp->open(dbp, NULL, filename, NULL, DB_BTREE, DB_RDONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { fprintf(stderr, "unable to open %s for reading: %s\n", filename, db_strerror(ret)); return ret; --- 156,162 ---- } */ ! ret = dbp->open(dbp, TRANID_PLACEHOLDER filename, NULL, DB_BTREE, DB_RDONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret) { fprintf(stderr, "unable to open %s for reading: %s\n", filename, db_strerror(ret)); return ret;