summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Denker <jsd@av8n.com>2012-11-24 17:03:18 (GMT)
committerJohn Denker <jsd@av8n.com>2012-11-24 17:03:18 (GMT)
commit7abce5d60408c7713181249ba3f23b72a40aa326 (patch)
treef89f7c75bdd955015849d2ac793169b9d2cdf27c
parent3043e470e483da2452943bd810256ea0ff8807b0 (diff)
new libskrewt-based code is now stable and in use;
let it be the basis for further developments
-rw-r--r--tools/makefile2
-rw-r--r--tools/skrewt.c498
2 files changed, 22 insertions, 478 deletions
diff --git a/tools/makefile b/tools/makefile
index fa3c689..2373125 100644
--- a/tools/makefile
+++ b/tools/makefile
@@ -61,7 +61,7 @@ fixown2: fixown.o utils.o
pipette: pipette.o utils.o
$(CC) $^ -o $@
-skrewt: skrewt.o utils.o sepofra.o
+skrewt: skrewt.o libskrewt.o utils.o sepofra.o
$(CC) $^ -lboost_filesystem-mt -lboost_system -lspf2 -o $@
./fixown $@
diff --git a/tools/skrewt.c b/tools/skrewt.c
index 63c6be6..abea289 100644
--- a/tools/skrewt.c
+++ b/tools/skrewt.c
@@ -8,7 +8,6 @@
#include <stdlib.h> /* for exit() */
#include <string> /* for strcmp() */
#include <ctype.h> /* toupper */
-#include <signal.h>
#include <stdio.h> /* perror */
#include <sstream>
@@ -37,33 +36,8 @@ void usage(const int sts){
exit(sts);
}
-#include "qq_exit_codes.h"
+#include "libskrewt.h"
#include "utils.h"
-#include "sepofra.h"
-
-void maybe_exeunt(const int sts, const int really){
- if (!really) return;
- if (sts == ex_good) exit(sts);
-
- const char* foo = getenv("HI_Q_GROUP");
- if (!foo) exit(sts);
-
-// No point in signalling ourself:
- sighandler_t rslt = signal(SIGUSR1, SIG_IGN);
- if (rslt == SIG_ERR) {
- cerr << "error setting signal" << endl;
- }
- int k = kill(-atoi(foo), SIGUSR1);
- if (k) {
- cerr << "kill failed on group " << atoi(foo) << " ... ";
- perror(0);
- }
- exit(sts);
-}
-
-void exeunt(const int sts){
- maybe_exeunt(sts, 1);
-}
string progname, progid;
int mypid;
@@ -72,87 +46,6 @@ int mypid;
/* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */
//
-void parse_content(const string type_spec_line,
- string &maintype, string &boundary) {
- //xxx cerr << "parser called with: " << type_spec_line << endl;
- string get_type(type_spec_line);
-
- size_t where = get_type.find_first_of(" \t;\n");
- string rest;
- if (where == string::npos) {
- // keep whole string
- }
- else {
- rest = get_type.substr(where+1);
- get_type = get_type.substr(0,where);
- }
- where = get_type.find("/");
- if (where == string::npos){
- maintype = "";
- cerr << "could not find / in " << get_type << endl;
- } else {
- maintype = get_type.substr(0, where);
- }
-
-// now need to find boundary
-
- string srch = "boundary=";
- where = rest.find(srch);
- if (where != string::npos) {
- where += srch.length();
- boundary = rest.substr(where);
- if (boundary[0] == '"') {
- boundary = boundary.substr(1);
- where = boundary.find_first_of("\"");
- } else {
- where = boundary.find_first_of(" \t;\n");
- }
- if (where == string::npos) {
- /* do nothing, boundary=boundary as a whole */
- } else {
- boundary = boundary.substr(0, where);
- }
- } else {
- //xxxxxxx cerr << "boundary= not found in " << type_spec_line << endl;
- }
-}
-
-class skrewt{
-public:
- string received_from; // envelope HELO among other things
- string proximta_HELO;
- string proximta_rDNS;
- string proximta_IP;
- string proximta_AuthUser;
- string return_path; // envelope MAIL FROM
- string boundary;
- string to;
- string from;
- string subject;
- string date;
- string message_id;
- string content_type;
- string delivered_to;
- int msgsize;
- vector<string> bigbuf;
- int saw_blank_line;
- int recno;
-
- int maxsize;
- int error_exit;
- int mid_required;
-
- // constructor
- skrewt()
- : boundary("x-xx-x"), msgsize(0), saw_blank_line(0), recno(0),
- maxsize(1000*1000), error_exit(0), mid_required(0)
- {}
-
- int headers();
- int interstage();
- int body();
- int krunch_rfrom();
-};
#if 0 /* typical "Received: from" lines */
Received: from lists.sourceforge.net (216.34.181.88)
@@ -178,360 +71,6 @@ Received: from ip68-231-191-153.tc.ph.cox.net (HELO asclepias.av8n.net) (smtp@68
/home/jsd/Maildir/cur/1342363199.24320.cloud:2,
#endif
-int skrewt::krunch_rfrom(){
- stringstream parse;
- parse.str(received_from);
- string word;
- parse >> word;
- if (word != "from") {
- cerr << progid << " bad 'Received: from' line ... '"
- << word << "'" << endl;
- return ex_syserr;
- }
- parse >> proximta_rDNS;
- parse >> word;
- if (word == "(HELO") {
- parse >> proximta_HELO;
- proximta_HELO = rtrim(proximta_HELO, "()");
- parse >> word;
- } else {
- proximta_HELO = proximta_rDNS;
- }
- size_t len = word.length();
- if (len<2 || word[0] != '(' || word[len-1] != ')') {
- cerr << progid << " bad 'Received: from' line ;;; '"
- << word << "'" << endl;
- return ex_syserr;
- }
- proximta_IP = word.substr(1, len-2);
- size_t where = proximta_IP.find("@");
- if (where != string::npos){
- proximta_AuthUser = proximta_IP.substr(0, where);
- proximta_IP = proximta_IP.substr(1+where);
- }
-
- return 0;
-}
-
-int skrewt::headers(){
- //xxxx cerr << progid << " begins" << endl;
- for (;;){ // outer loop over all records in the header
- if (cin.eof()) break;
- if (cin.bad()) return 1;
-
- string line;
-// on fail, go back to top of outer loop and check for eof versus bad
- if (getline(cin, line).fail()) continue;
- msgsize += line.length()+1;
- if (msgsize > maxsize) {
- cerr << progid << " rejection: bigger than " << maxsize << endl;
- exeunt(ex_spam);
- }
- cout << line << endl;
- bigbuf.push_back(line);
- string headrec = noCR(line); // for a folded record, this is the first line
-
- for (;;) { // inner loop to build a multi-line record e.g. folded record:
- if (cin.eof()) break;
- if (cin.bad()) return 1;
- char ch;
- if (cin.get(ch).fail()) continue;
- cin.putback(ch);
- if (ch != ' ' && ch != '\t') break;
- string line;
-// on fail, go back to top of inner loop and check for eof versus bad
- if (getline(cin, line).fail()) continue;
- msgsize += line.length()+1;
- if (msgsize > maxsize) {
- cerr << progid << " rejection: bigger than " << maxsize << endl;
- exeunt(ex_spam);
- }
- cout << line << endl;
- bigbuf.push_back(line);
- headrec += "\n" + noCR(line);
- }
-// here with a fully assembled header record
-// headrec (unlike line) contains no DOS CR characters
- int len = headrec.length();
- if (len == 0) {
- saw_blank_line = 1;
- break; // no more headers in this message
- }
-
-// here if it's a header line
- string headword;
- string rest;
- size_t where = headrec.find(":");
- if (where != string::npos) {
- headword = headrec.substr(0, where);
- rest = ltrim(headrec.substr(1+where));
- }
- headword = toLower(headword);
- if (0){
- } else if (headword == "from") {
- from = rest;
- } else if (headword == "to") {
- to = rest;
- } else if (headword == "return-path") {
- return_path = rest;
- } else if (headword == "message-id") {
- message_id = rest;
- } else if (headword == "received") {
- if (!received_from.length() && prefix("from ", rest)){
- received_from = rest;
- }
- } else if (headword == "date") {
- date = rest;
- } else if (headword == "subject") {
- subject = rest;
- } else if (headword == "content-type") {
- content_type = rest;
- } else if (headword == "delivered-to") {
- delivered_to = rest;
- }
- //xxxx cout << headrec.length() << " ... ";
- recno++;
- if (0) if (recno <= 6) cerr << progid << "#" << recno
- << " " << headrec << endl;
- }
- return 0;
-}
-
-int skrewt::interstage(){
- if (saw_blank_line) {/* ignore */}
-// Note that the headers are in reverse-chronological order:
- cerr << progid <<" Return-path: " << return_path <<endl;
-
- { // parse the 'Received: from' line:
- cerr << " Received: " << received_from <<endl;
- int rslt = krunch_rfrom();
- if (rslt) return rslt;
- cerr << " rDNS: " << proximta_rDNS << endl;
- cerr << " HELO: " << proximta_HELO << endl;
- cerr << " IP: " << proximta_IP << endl;
- cerr << " AuthUser: " << proximta_AuthUser << endl;
- cerr << " Mid '" << message_id << "'" << endl;
- }
-
- sepofra my_spf;
- try {
- my_spf.check(proximta_IP,
- proximta_HELO,
- return_path,
- "junk", 0/* verbosity */);
- cerr << "*** " << my_spf.explain() << endl;
- } catch (bad_thing foo) {
- cerr << "Caught bad thing: " << foo.what() << endl;
- return ex_syserr;
- }
-
-// The logic here is: In order:
-// 1:: If whitelisted, accept. No greylisting, no spam-checking.
-// 2:: If blacklisted, reject. No greylisting, no spam-checking.
-// 3:: If good reputation, spam-check it and send it on its way.
-// 4:: If no reputation, greylist.
-// 5:: If bad reputation, ????
-
-// Expanding item 3 to the next level of detail:
-// 3a:: If some domain vouches for this sender-IP via SPF,
-// then the reputation is bound to the domain.
-// 3c:: If some domain vouches for the message vie DKIM,
-// then the reputation is bound to the domain.
-// 3d:: If no SPF or DKIM, then the reputation attaches
-// to the sender-IP.
-
-// Expanding item 4 to the next level of detail:
-// 4a:: If the greylisting database says this message is ripe
-// spam-check it. If it's OK, use it to count toward reputation.
-// 4b:: If it is previously unseen or too old, start greylisting
-// timer from scratch. Reject with temporary error.
-// 4c:: If it is in the "green" state, let the timer
-// continue from where it is. Reject with temporary error.
-
-// Note: Reputation normally attaches to a domain.
-// With SPF, the domain vouches for the sender at a given IP address
-// ... and then the sender implicitly vouches for the message.
-// With DKIM, the domain vouches for an individual message.
-// With neither SPF nor DKIM, reputation attaches to the sender's
-// IP address. The sender vouches for the message.
-//
-// During greylisting, delay applies to the message. Reputation
-// applies to the domain (via SPF or DKIM) or to the server
-// (otherwise).
-
-
-// If you are a medium-sized operator, such that you have one
-// and only one IP address that ever sends email, and it is a
-// static IP address, then you don't have much to gain from
-// DKIM or SPF. Attaching a reputation to your domain is not
-// much different from attaching a reputation to your IP address.
-
-// In constrast, if you are a low-budget operator with a
-// dynamic IP address, you benefit from SPF and/or DKIM.
-// Your reputation attaches to your domain, and remains
-// stable even as your IP address changes.
-
-// At the other extreme, if you are a big-time operator
-// such as googlegroups.com, you benefit from DKIM and/or
-// SPF. Your IP addresses are not dynamic, but they are
-// numerous, so you prefer to have your reputation apply
-// to all your email-sending hosts.
-
-#if 0 /* typical Received-SPF line */
- Received-SPF: pass (google.com: domain of rpendarvis@brenau.edu designates 74.125.245.70 as permitted sender) client-ip=74.125.245.70;
-#endif
-
-#if 0 /* SPF users */
- :; mail-scan +received-spf /home/jsd/Maildir/cur[/]* |
- sed 's/.*domain of\(.*\).*designates.*/XXX \1 YYY/' |
- awk '/XXX/{print "<" $2 ">"}' | sort | uniq -c | sort -nr
- 81 <gmail.com>
- 17 <mac.com>
- 8 <gmx.net>
- 8 <bbruner@gmail.com>
- 7 <jsd@av8n.com>
- 6 <kst24@cam.ac.uk>
- 5 <farooq.w@gmail.com>
- 4 <scerri@chem.ucla.edu>
- 4 <comcast.net>
- 4 <c2i.net>
- 3 <gemort2006@gmail.com>
- 2 <rrhake@earthlink.net>
- 2 <hotmail.com>
- 2 <GCC.EDU>
- 1 <us.panasonic.com>
- 1 <sss.pgh.pa.us>
- 1 <scot_wherland@wsu.edu>
- 1 <rpendarvis@brenau.edu>
- 1 <hmperks@gmail.com>
- 1 <btv1==55494f7d7e0==matt.fisher@email.stvincent.edu>
- 1 <arcor.de>
-#endif
-
-#if 0 /* DKIM users */
- 52 d=googlegroups.com;
- 27 d=barackobama.com;
- 10 d=gmail.com;
- 5 d=bronto.com;
- 5 d=bluehornet.com;
- 4 d=news.abebooks.com;
- 2 d=yahoo.co.uk;
- 2 d=sbcglobal.net;
- 2 d=embarqmail.com;
- 2 d=emailms.angieslist.com;
- 1 d=newsletters.sourceforge.net;
- 1 d=members.ebay.com;
- 1 d=info.citibank.com;
- 1 d=ebay.com;
- 1 d=commail1.co.za;
-#endif
-
- list<string> badnews;
- int whitelisted(0);
-
- if (subject.find("sesame") != string::npos
- && subject.find("swordfish") != string::npos) {
- whitelisted++;
- }
-
- if (delivered_to.length()){
- cerr << progid << " Delivered-to: <<<" << delivered_to << ">>>" << endl;
- }
- if (toLower(trim(delivered_to)) == "jean@av8n.com") {
- badnews.push_back("Looping Delivered-to: " + delivered_to);
- }
-
- if (subject.find("-please-bounce-this-") != string::npos) {
- badnews.push_back("by request");
- }
-
- if (!date.length()) {
- badnews.push_back("no date");
- }
-
- if (mid_required && !message_id.length()) {
- badnews.push_back("no message-id");
- }
-
- if (badnews.size() && !whitelisted){
- cerr << progid << " " << join(", ", badnews) << endl;
- if (error_exit){
- cerr << progid << " '" << from
- << "' to '" << to
- << "'" << endl;
- exeunt(ex_spam);
- }
- }
- return 0;
-}
-
-int skrewt::body(){
- string main_contype;
- if (content_type.length())
- parse_content(content_type, main_contype, boundary);
-// some slightly-useful booleans:
- int currently_text = main_contype == "text";
- int main_multipart = main_contype == "multipart";
-
-// early-stage thinking has been done.
-// Now spew the rest of the message
- //xxxx cerr << "body begins: " << main_contype << " " << currently_text << " " << boundary << endl;
- int in_subheads(0);
- int textlines(0);
-
- for (;;){ // outer loop over all lines in the body
- if (cin.eof()) break;
- if (cin.bad()) return 1;
- string line;
-// on fail, go back to top of outer loop and check for eof versus bad
- if (getline(cin, line).fail()) continue;
- msgsize += line.length()+1;
- if (msgsize > maxsize) {
- cerr << progid << " rejection: bigger than " << maxsize << endl;
- maybe_exeunt(ex_spam, error_exit);
- }
- bigbuf.push_back(line);
- cout << line << endl;
- if (in_subheads){
- if (line == "" || line == "\r") in_subheads = 0;
- }
- if (in_subheads){
- string sub_contype;
- string junk;
-// in principle could worry about folded headers,
-// but in this application it doesn't actually matter
- string headword;
- string rest;
- size_t where = line.find(":");
- if (where != string::npos) {
- headword = line.substr(0, where);
- rest = ltrim(line.substr(1+where));
- }
- headword = toLower(headword);
- if (headword == "content-type") {
- parse_content(rest, sub_contype, junk);
- currently_text = sub_contype == "text";
- //xxxx cerr << "setting contype '" << sub_contype << "' " << currently_text << " ... " << textlines << endl;
- }
- } else {
- if (main_multipart && line == "--" + boundary) {
- //xxxx cerr << "found subhead boundary" << endl;
- in_subheads = 1;
- continue;
- }
- if (currently_text) textlines++;
- }
- }
-
- if (0) cerr << "textlines: " << textlines << endl;
- if (!textlines) {
- cerr << progid << " rejection: no text: " << error_exit << endl;
- maybe_exeunt(ex_spam, error_exit);
- }
- cerr << progid << " normal completion" << endl;
- return(ex_good);
-}
-
////////////////////////////////////////////////////////////
int main(int _argc, const char** _argv){
@@ -546,24 +85,22 @@ int main(int _argc, const char** _argv){
}
skrewt mysk;
+// cerr << "maxsize: " << mysk.maxsize << endl;
- while (argc) {
- string arg(*argv); argv++; argc--;
+ argParser ARGS(argc, argv);
+ try {while (ARGS.size()) {
+ string arg = ARGS.next();
if (arg.substr(0,2) == "--") arg = arg.substr(1);
- if (prefix(arg, "-help")) {
+ if (ARGS.prefix("-help")) {
usage(0);
}
if (0) {
- } else if (prefix(arg, "-mid-required")) {
+ } else if (ARGS.prefix("-mid-required")) {
mysk.mid_required++;
- } else if (prefix(arg, "-error-exit")) {
+ } else if (ARGS.prefix("-error-exit")) {
mysk.error_exit++;
- } else if (prefix(arg, "-maxsize")) {
- if (!argc) {
- cerr << "Option -maxsize requires an argument" << endl;
- exit(ex_usage);
- }
- mysk.maxsize = atoi(*argv); argv++; argc--;
+ } else if (ARGS.prefix("-maxsize", 1)) {
+ mysk.maxsize = atoi(ARGS.shift().c_str());
} else if (arg.substr(0,1) == "-") {
cerr << "Unrecognized option '" << arg << "'" << endl;
cerr << "For help, try: " << progname << " -help" << endl;
@@ -573,10 +110,16 @@ int main(int _argc, const char** _argv){
cerr << "For help, try: " << progname << " -help" << endl;
exit(ex_usage);
}
+ }}
+ catch (int) {
+ exit(ex_usage);
}
- int rslt = mysk.headers();
+ int rslt = mysk.headers(cin);
if (rslt) return rslt;
+ mysk.dump_bigbuf(cout);
+ mysk.headerbuf = mysk.bigbuf;
+ mysk.bigbuf = vector<string>(0);
// Headers are done.
// Do some early-stage thinking.
@@ -584,7 +127,8 @@ int main(int _argc, const char** _argv){
rslt = mysk.interstage();
if (rslt) return rslt;
- rslt = mysk.body();
- return rslt;
-
+ rslt = mysk.body(cin, cout);
+ if (rslt) return rslt;
+ mysk.dump_bigbuf(cout);
+ return 0;
}