summaryrefslogtreecommitdiff
path: root/tools/mail-scan.c
diff options
context:
space:
mode:
authorJohn Denker <jsd@av8n.com>2012-07-15 13:43:36 (GMT)
committerJohn Denker <jsd@av8n.com>2012-07-15 13:43:36 (GMT)
commitffc4b5ce605253957b2a0ce82cf924c669806590 (patch)
tree0c312a3c58a91700893bc3ad6548d730ea9b2a37 /tools/mail-scan.c
parentb0f78b272c97b7cc3ff1fbe6c48fd2151d039613 (diff)
primitive grep-like thing for looking at mail headers
Diffstat (limited to 'tools/mail-scan.c')
-rw-r--r--tools/mail-scan.c219
1 files changed, 219 insertions, 0 deletions
diff --git a/tools/mail-scan.c b/tools/mail-scan.c
new file mode 100644
index 0000000..1260f30
--- /dev/null
+++ b/tools/mail-scan.c
@@ -0,0 +1,219 @@
+///////////////////
+// skrewt.c
+//
+// scrutinize email
+//
+
+#include <iostream>
+#include <stdlib.h> /* for exit() */
+#include <string>
+#include <list>
+#include <ctype.h> /* toupper */
+#include <signal.h>
+#include <fstream>
+
+#include <stdio.h> /* perror */
+
+using namespace std;
+
+void usage(const int sts){
+ (sts ? cerr : cout) <<
+"Usage: skrewt [options]\n"
+"\n"
+" Scrutinizes email. Reads stdin, copies it to stdout.\n"
+" Exit result 0 means good, 1 means rejection (spam).\n"
+" Writes reason for rejection to stderr.\n"
+"\n"
+" Typically used as a filter in a pipeline, along with spamc -E\n"
+" Options\n"
+" -help print this msg (and exit immediately).\n"
+" -maxsize ii msg size in bytes; anything bigger will be rejected.\n"
+"\n"
+" Messages containing the string '-please-bounce-this-' will be rejected.\n"
+" Messages with no date will be rejected.\n"
+;
+ exit(sts);
+}
+
+// exit codes, compatible with spamassassin (not with qmail-queue)
+const int sa_good(0);
+const int sa_spam(1);
+const int sa_usage(64);
+
+/////////////////////////////////////////////////////////
+// Case insensitive comparison of strings
+
+class lessthan_foldcase{
+public:
+ bool operator() (const std::string& a, const std::string& b) const {
+ size_t a_len = a.length();
+ size_t b_len = b.length();
+
+ size_t lim = a_len < b_len ? a_len : b_len;
+
+ for (size_t i=0; i<lim; ++i)
+ {
+ char cha = toupper(a[i]);
+ char chb = toupper(b[i]);
+
+ if (cha < chb) return true;
+ if (cha > chb) return false;
+ }
+ // here if one is an extension of the other
+ if ( a_len < b_len ) return true;
+ return false;
+ }
+};
+
+
+// Returns negative if a is less than b in alphabetical order
+// returns 0 if they are the same, or positive if a is greater.
+// Like perl cmp operator, but ignores case.
+int cmp_casefold(const std::string& a, const std::string& b) {
+ string::const_iterator aa, bb;
+ aa = a.begin();
+ bb = b.begin();
+ while (aa != a.end() && bb != b.end()){
+ char ca = tolower(*aa++);
+ char cb = tolower(*bb++);
+ if (ca != cb) return ca < cb ? -2 : 2;
+ }
+ if (aa != a.end()) return 1; // a is longer
+ if (bb != b.end()) return -1; // b is longer
+ return 0;
+}
+
+
+string toLower(const std::string& a){
+ string rslt = a;
+ string::iterator rr;
+ for (rr = rslt.begin(); rr != rslt.end(); rr++){
+ *rr = tolower(*rr);
+ }
+ return rslt;
+}
+
+////////////////
+string ltrim(string foo){
+ size_t where = foo.find_first_not_of(" \t\r\n");
+ if (where == foo.npos) return foo;
+ return foo.substr(where);
+}
+
+////////////////
+// little utility to help with argument parsing:
+//
+int prefix(const string shorter, const string longer){
+ return shorter == longer.substr(0, shorter.length());
+}
+
+void exeunt(const int sts){
+ if (sts == sa_good) exit(sts);
+
+ const char* foo = getenv("HI_Q_GROUP");
+ if (!foo) exit(sts);
+
+// No point in signalling ourself:
+ sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
+ if (rslt == SIG_ERR) {
+ cerr << "error setting signal" << endl;
+ }
+ int k = kill(-atoi(foo), SIGUSR1);
+ if (k) {
+ cerr << "kill failed on group " << atoi(foo) << " ... ";
+ perror(0);
+ }
+ exit(sts);
+}
+
+////////////////////////////////////////////////////////////
+int main(int _argc, const char** _argv){
+//// pid_t pid = getpid();
+//// cout << pid << endl;
+//// cout << getpgid(pid) << endl;
+ int argc(_argc);
+ const char **argv(_argv);
+ string progname(*argv); argv++; argc--;
+
+ int maxsize(1000000);
+ list<string> watchword;
+ list<string> dofile;
+
+ while (argc) {
+ string arg(*argv); argv++; argc--;
+ if (arg.substr(0,2) == "--") arg = arg.substr(1);
+ if (prefix(arg, "-help")) {
+ usage(0);
+ }
+ if (arg.substr(0,1) == "-") {
+ cerr << "Unrecognized option '" << arg << "'" << endl;
+ cerr << "For help, try: " << progname << " -help" << endl;
+ exit(sa_usage);
+ }
+ if (arg.substr(0,1) == "+") {
+ watchword.push_back(arg.substr(1));
+ } else {
+ dofile.push_back(arg);
+ }
+ }
+
+ for (list<string>::const_iterator file = dofile.begin();
+ file != dofile.end(); file++) {
+ ifstream infile;
+ infile.open(file->c_str());
+ if (infile.bad()) {
+ cerr << "Failed to open file: " << *file << endl;
+ }
+ int inheads(1);
+ string boundary("x-xx-x");
+ int msgsize(0);
+ for (;;){
+ if (infile.eof()) break;
+ if (infile.bad()) return 1;
+ if (inheads) {
+ string header;
+ if (getline(infile, header).fail()) continue;
+ msgsize += header.length()+1;
+ for (;;) {
+ if (infile.eof()) break;
+ if (infile.bad()) return 1;
+ char ch;
+ if (infile.get(ch).fail()) continue;
+ infile.putback(ch);
+ if (ch != ' ' && ch != '\t') break;
+ string line;
+ if (getline(infile, line).fail()) continue;
+ msgsize += line.length()+1;
+ if (msgsize > maxsize) {
+ cerr << "skrewt rejection: bigger than " << maxsize << endl;
+ exeunt(sa_spam);
+ }
+ header += "\n" + line;
+ }
+ if (header.length() == 0) {
+ inheads = 0;
+ }
+ else {
+ string headword;
+ string rest;
+ size_t where = header.find(":");
+ if (where != string::npos) {
+ headword = header.substr(0, where);
+ rest = ltrim(header.substr(1+where));
+ }
+ headword = toLower(headword);
+ for (list<string>::const_iterator ptr = watchword.begin();
+ ptr != watchword.end(); ptr++) {
+ if (headword == toLower(*ptr)) {
+ cout << *file << " :: " << header << endl;
+ }
+ }
+ }
+// cout << header << endl;
+ } else {
+ // not in header
+ break;
+ }
+ }
+ }
+}