summaryrefslogtreecommitdiff
path: root/tools/skrewt.c
diff options
context:
space:
mode:
authorJohn Denker <jsd@av8n.com>2012-07-24 01:01:34 (GMT)
committerJohn Denker <jsd@av8n.com>2012-07-29 22:32:36 (GMT)
commit9b5fbc11bb1d96dd598ebfad5539660f75571835 (patch)
tree581e729da93502a0ed22b1ac6700acf052232714 /tools/skrewt.c
parent3993d2f92fc1d357ee668d42cbb44aa3744e6d2c (diff)
progress toward cleaning up skrewt
Diffstat (limited to 'tools/skrewt.c')
-rw-r--r--tools/skrewt.c265
1 files changed, 163 insertions, 102 deletions
diff --git a/tools/skrewt.c b/tools/skrewt.c
index 6de3dd9..ed0e627 100644
--- a/tools/skrewt.c
+++ b/tools/skrewt.c
@@ -12,6 +12,7 @@
#include <stdio.h> /* perror */
#include <sstream>
+#include <vector>
using namespace std;
@@ -144,6 +145,58 @@ string basename(const string path){
string progname, progid;
int mypid;
+
+/* Content-Type: text/plain; charset="us-ascii" */
+/* Content-Type: multipart/mixed; boundary="1170861315-1262462055-1341954763=:92165" */
+void parse_content(const string type_spec_line, string &maintype, string &boundary) {
+ string mainline(type_spec_line);
+
+ string get_type(toLower(mainline));
+ size_t where = get_type.find_first_of(" \t;\n");
+ if (where == string::npos) {
+ // keep whole string
+ }
+ else {
+ get_type = get_type.substr(0,where);
+ }
+ where = get_type.find("/");
+ if (where == string::npos){
+ maintype = "";
+ } else {
+ maintype = get_type.substr(0, where);
+ }
+}
+
+#ifdef xxxxxxxxxxxxxxxxxxxx
+ if
+ if (0) cerr << "type (" << get_type << ") "
+ << (text_type?"text":"nope") << endl;
+
+
+ string srch = "boundary=";
+ where = headrec.find(srch);
+ if (where != string::npos) {
+ where += srch.length();
+ boundary = headrec.substr(where);
+ if (boundary[0] == '"') {
+ boundary = boundary.substr(1);
+ where = boundary.find_first_of("\"");
+ } else {
+ where = boundary.find_first_of(" \t;\n");
+ }
+ if (where == string::npos) {
+ /* do nothing, boundary=boundary as a whole */
+ } else {
+ boundary = boundary.substr(0, where);
+ }
+ }
+ }
+#endif
+
+
+
+
+
////////////////////////////////////////////////////////////
int main(int _argc, const char** _argv){
//// pid_t pid = getpid();
@@ -159,8 +212,7 @@ int main(int _argc, const char** _argv){
progid = binder.str();
}
-
- int maxsize(1000000);
+ int maxsize(1000*1000);
while (argc) {
string arg(*argv); argv++; argc--;
@@ -186,121 +238,130 @@ int main(int _argc, const char** _argv){
}
}
- int inheads(1);
+ int saw_blank_line(0);
string boundary("x-xx-x");
- int text_type(1);
- int textlines(0);
- int gotdate(0);
+ string date;
+ string subject;
+ string content_type;
+ string message_id;
int msgsize(0);
- for (;;){
+ vector<string> bigbuf;
+ cerr << "hi there" << endl;
+
+ for (;;){ // outer loop over all records in the header
if (cin.eof()) break;
if (cin.bad()) return 1;
- if (inheads) {
- string header;
- if (getline(cin, header).fail()) continue;
- msgsize += header.length()+1;
+
+ string headrec;
+// on fail, go back to top of outer loop and check for eof versus bad
+ if (getline(cin, headrec).fail()) continue;
+ msgsize += headrec.length()+1;
+ if (msgsize > maxsize) {
+ cerr << progid << " rejection: bigger than " << maxsize << endl;
+ exeunt(ex_spam);
+ }
+ cout << headrec << endl;
+ bigbuf.push_back(headrec); // for a folded record, this is the first line
+
+ for (;;) { // inner loop to build a multi-line record e.g. folded record:
+ if (cin.eof()) break;
+ if (cin.bad()) return 1;
+ char ch;
+ if (cin.get(ch).fail()) continue;
+ cin.putback(ch);
+ if (ch != ' ' && ch != '\t') break;
+ string line;
+// on fail, go back to top of inner loop and check for eof versus bad
+ if (getline(cin, line).fail()) continue;
+ msgsize += line.length()+1;
if (msgsize > maxsize) {
cerr << progid << " rejection: bigger than " << maxsize << endl;
exeunt(ex_spam);
}
- for (;;) {
- if (cin.eof()) break;
- if (cin.bad()) return 1;
- char ch;
- if (cin.get(ch).fail()) continue;
- cin.putback(ch);
- if (ch != ' ' && ch != '\t') break;
- string line;
- if (getline(cin, line).fail()) continue;
- msgsize += line.length()+1;
- if (msgsize > maxsize) {
- cerr << progid << " rejection: bigger than " << maxsize << endl;
- exeunt(ex_spam);
- }
- header += "\n" + line;
- }
- int len = header.length();
- if (len && header[len-1] == '\r') len--; // reduced length, not counting <cr>
- if (len == 0) {
- if (!gotdate) {
- cerr << progid << " rejection: no date" << endl;
- exeunt(ex_spam); // disallow mail with no date
- }
- inheads = 0;
- //cerr << "end of headers" << endl;
+ cout << line << endl;
+ bigbuf.push_back(line);
+ string cooked(line);
+ if (cooked.length()){
+ string::iterator ptr = cooked.end()-1;
+ if (*ptr == '\r') cooked.erase(ptr);
}
- else {
+ headrec += "\n" + cooked;
+ }
+// here with a fully assembled header record
+ int len = headrec.length();
+ if (len && headrec[len-1] == '\r') len--; // reduced length, not counting <cr>
+ if (len == 0) {
+ saw_blank_line = 1;
+ break; // no more headers in this message
+ }
+
// here if it's a header line
- string headword;
- string rest;
- size_t where = header.find(":");
- if (where != string::npos) {
- headword = header.substr(0, where);
- rest = ltrim(header.substr(1+where));
- }
- headword = toLower(headword);
- if (headword == "content-type") {
- string the_type = rest;
- size_t where = the_type.find_first_of(" \t;\n");
- if (where == string::npos) {
- /* do nothing */
- }
- else {
- the_type = the_type.substr(0,where);
- }
- the_type = toLower(the_type);
- text_type = (the_type.find("text/") == 0);
- if (0) cerr << "type (" << the_type << ") "
- << (text_type?"text":"nope") << endl;
- string srch = "boundary=";
- where = header.find(srch);
- if (where != string::npos) {
- where += srch.length();
- boundary = header.substr(where);
- if (boundary[0] == '"') {
- boundary = boundary.substr(1);
- where = boundary.find_first_of("\"");
- } else {
- where = boundary.find_first_of(" \t;\n");
- }
- if (where == string::npos) {
- /* do nothing, boundary=boundary as a whole */
- } else {
- boundary = boundary.substr(0, where);
- }
- }
- } else if (headword == "date") {
- gotdate++;
- } else if (headword == "subject") {
- if (rest.find("-please-bounce-this-") != string::npos) {
- cerr << progid << " rejection: by request" << endl;
- exeunt(ex_spam);
- }
- }
- }
- //xxxx cout << header.length() << " ... ";
- cout << header << endl;
+ string headword;
+ string rest;
+ size_t where = headrec.find(":");
+ if (where != string::npos) {
+ headword = headrec.substr(0, where);
+ rest = ltrim(headrec.substr(1+where));
+ }
+ headword = toLower(headword);
+ if (0){
+ } else if (headword == "date") {
+ date = rest;
+ } else if (headword == "subject") {
+ subject = rest;
+ } else if (headword == "content-type") {
+ content_type = rest;
+ }
+ //xxxx cout << headrec.length() << " ... ";
+ }
+ cerr << "headers are done. Delimited: " << saw_blank_line << endl;
+
+// Headers are done.
+// Do some early-stage thinking.
+
+ if (subject.find("-please-bounce-this-") != string::npos) {
+ cerr << progid << " rejection: by request" << endl;
+ exeunt(ex_spam);
+ }
+
+ if (!date.length()) {
+ cerr << progid << " rejection: no date" << endl;
+ exeunt(ex_spam); // disallow mail with no date
+ }
+
+ string contype;
+ int textlines(0);
+ parse_content(content_type, contype, boundary);
+ int is_text = contype == "text";
+
+// early-stage thinking has been done.
+// Now spew the rest of the message
+ cerr << "body begins: " << contype << " " << is_text << endl;
+ int inheads(0);
+ for (;;){ // outer loop over all lines in the body
+ if (cin.eof()) break;
+ if (cin.bad()) return 1;
+ string line;
+// on fail, go back to top of outer loop and check for eof versus bad
+ if (getline(cin, line).fail()) continue;
+ msgsize += line.length()+1;
+ if (msgsize > maxsize) {
+ cerr << progid << " rejection: bigger than " << maxsize << endl;
+ exeunt(ex_spam);
+ }
+ bigbuf.push_back(line);
+ cout << line << endl;
+ if (line == "--" + boundary) {
+ inheads = 1;
} else {
- string line;
- if (!getline(cin, line).fail()) {
- msgsize += line.length()+1;
- if (msgsize > maxsize) {
- cerr << progid << " rejection: bigger than " << maxsize << endl;
- exeunt(ex_spam);
- }
- if (line == "--" + boundary) {
- inheads = 1;
- } else {
- if (text_type) {
- if (ltrim(line).length()) textlines++;
- }
- }
- cout << line << endl;
+ if (is_text) {
+ if (ltrim(line).length()) textlines++;
}
}
}
+
if (0) cerr << "textlines: " << textlines << endl;
- if (!textlines) {
+ if (000 && !textlines) {
cerr << progid << " rejection: no text" << endl;
exeunt(ex_spam);
}