summaryrefslogtreecommitdiff
path: root/tools/skrewt.c
diff options
context:
space:
mode:
authorJohn Denker <jsd@av8n.com>2012-07-13 18:25:21 (GMT)
committerJohn Denker <jsd@av8n.com>2012-07-14 01:39:52 (GMT)
commit0d4a1c9ffb636d84c180a703c4070be44bf95f51 (patch)
tree90cce82d83978d5da2260566ba8c52e75edc8b7b /tools/skrewt.c
parent6fb7c9b8826101dc1fc415df3fe94252e83d02e4 (diff)
add skrewt to filter-chain
Diffstat (limited to 'tools/skrewt.c')
-rw-r--r--tools/skrewt.c198
1 files changed, 198 insertions, 0 deletions
diff --git a/tools/skrewt.c b/tools/skrewt.c
new file mode 100644
index 0000000..117981b
--- /dev/null
+++ b/tools/skrewt.c
@@ -0,0 +1,198 @@
+///////////////////
+// skrewt.c
+//
+// scrutinize email
+//
+
+#include <iostream>
+#include <stdlib.h> /* for exit() */
+#include <string> /* for strcmp() */
+#include <ctype.h> /* toupper */
+
+using namespace std;
+
+void usage(const int sts){
+ (sts ? cerr : cout) <<
+"Usage: skrewt [options]\n"
+"\n"
+" Scrutinizes email. Reads stdin, copies it to stdout.\n"
+" Exit result 0 means good, 1 means rejection (spam).\n"
+" Writes reason for rejection to stderr.\n"
+"\n"
+" Typically used as a filter in a pipeline, along with spamc -E\n"
+" Options\n"
+" -h print this msg (and exit immediately).\n"
+"\n"
+" Messages containing the string '-please-bounce-this-' will be rejected.\n"
+" Messages with no date will be rejected.\n"
+;
+ exit(sts);
+}
+
+/////////////////////////////////////////////////////////
+// Case insensitive comparison of strings
+
+class lessthan_foldcase{
+public:
+ bool operator() (const std::string& a, const std::string& b) const {
+ size_t a_len = a.length();
+ size_t b_len = b.length();
+
+ size_t lim = a_len < b_len ? a_len : b_len;
+
+ for (size_t i=0; i<lim; ++i)
+ {
+ char cha = toupper(a[i]);
+ char chb = toupper(b[i]);
+
+ if (cha < chb) return true;
+ if (cha > chb) return false;
+ }
+ // here if one is an extension of the other
+ if ( a_len < b_len ) return true;
+ return false;
+ }
+};
+
+
+// Returns negative if a is less than b in alphabetical order
+// returns 0 if they are the same, or positive if a is greater.
+// Like perl cmp operator, but ignores case.
+int cmp_casefold(const std::string& a, const std::string& b) {
+ string::const_iterator aa, bb;
+ aa = a.begin();
+ bb = b.begin();
+ while (aa != a.end() && bb != b.end()){
+ char ca = tolower(*aa++);
+ char cb = tolower(*bb++);
+ if (ca != cb) return ca < cb ? -2 : 2;
+ }
+ if (aa != a.end()) return 1; // a is longer
+ if (bb != b.end()) return -1; // b is longer
+ return 0;
+}
+
+
+string toLower(const std::string& a){
+ string rslt = a;
+ string::iterator rr;
+ for (rr = rslt.begin(); rr != rslt.end(); rr++){
+ *rr = tolower(*rr);
+ }
+ return rslt;
+}
+
+
+string ltrim(string foo){
+ unsigned where = foo.find_first_not_of(" \t\r\n");
+ if (where == foo.npos) return foo;
+ return foo.substr(where);
+}
+
+
+
+int main(int argc, char** argv){
+ if (argc > 1) {
+ if (argv[1] == string("-h")) usage(0);
+ usage(1);
+ }
+
+ int inheads(1);
+ string boundary("x-xx-x");
+ int text_type(1);
+ int textlines(0);
+ int gotdate(0);
+ for (;;){
+ if (cin.eof()) break;
+ if (cin.bad()) return 1;
+ if (inheads) {
+ string header;
+ if (getline(cin, header).fail()) continue;
+ for (;;) {
+ if (cin.eof()) break;
+ if (cin.bad()) return 1;
+ char ch;
+ if (cin.get(ch).fail()) continue;
+ cin.putback(ch);
+ if (ch != ' ' && ch != '\t') break;
+ string line;
+ if (getline(cin, line).fail()) continue;
+ header += "\n" + line;
+ }
+ if (header.length() == 0) {
+ if (!gotdate) {
+ cerr << "skrewt rejection: no date" << endl;
+ exit(1); // disallow mail with no date
+ }
+ inheads = 0;
+ }
+ else {
+ string headword;
+ string rest;
+ unsigned int where = header.find(":");
+ if (where != string::npos) {
+ headword = header.substr(0, where);
+ rest = ltrim(header.substr(1+where));
+ }
+ headword = toLower(headword);
+ if (headword == "content-type") {
+ string the_type = rest;
+ unsigned int where = the_type.find_first_of(" \t;\n");
+ if (where == string::npos) {
+ /* do nothing */
+ }
+ else {
+ the_type = the_type.substr(0,where);
+ }
+ the_type = toLower(the_type);
+ text_type = (the_type.find("text/") == 0);
+ if (0) cerr << "type (" << the_type << ") "
+ << (text_type?"text":"nope") << endl;
+ string srch = "boundary=";
+ where = header.find(srch);
+ if (where != string::npos) {
+ where += srch.length();
+ boundary = header.substr(where);
+ if (boundary[0] == '"') {
+ boundary = boundary.substr(1);
+ where = boundary.find_first_of("\"");
+ } else {
+ where = boundary.find_first_of(" \t;\n");
+ }
+ if (where == string::npos) {
+ /* do nothing, boundary=boundary as a whole */
+ } else {
+ boundary = boundary.substr(0, where);
+ }
+ }
+ } else if (headword == "date") {
+ gotdate++;
+ } else if (headword == "subject") {
+ if (rest.find("-please-bounce-this-") != string::npos) {
+ cerr << "skrewt rejection: by request" << endl;
+ exit(1);
+ }
+ }
+ }
+ cout << header << endl;
+ } else {
+ string line;
+ if (!getline(cin, line).fail()) {
+ if (line == "--" + boundary) {
+ inheads = 1;
+ } else {
+ if (text_type) {
+ if (ltrim(line).length()) textlines++;
+ }
+ }
+ cout << line << endl;
+ }
+ }
+ }
+ if (0) cerr << "textlines: " << textlines << endl;
+ if (!textlines) {
+ cerr << "skrewt rejection: no text" << endl;
+ exit(1);
+ }
+ return 0;
+}