aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Denker <jsd@av8n.com>2021-10-17 10:10:18 -0700
committerJohn Denker <jsd@av8n.com>2021-10-17 11:09:24 -0700
commit74ddd0381aa1b1a90eb0d5300fa576cb2348eeac (patch)
tree72a9dded6f800467d52e479eb37574e6de5f2e6c
parent634d365a03cb0581a062cd3cf4db9ae69f1cde26 (diff)
basically functional, but still a work in progress
-rw-r--r--.gitignore17
-rw-r--r--arg_parser.c288
-rw-r--r--arg_parser.h95
-rw-r--r--binomial-coefficient-choose.h6
-rw-r--r--makefile32
-rw-r--r--parse_csv.c184
-rw-r--r--parse_csv.h34
-rw-r--r--poiss.c658
-rw-r--r--poissonier.h108
9 files changed, 1422 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..60f56d2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+RCS
+CVS
+*.o
+*.d
+*~
+*#[0-9]*
+[#]*#
+.#*
+*.aux
+*.aux1
+*.haux
+*.htoc
+*.log
+
+###############
+
+poiss
diff --git a/arg_parser.c b/arg_parser.c
new file mode 100644
index 0000000..d4ff196
--- /dev/null
+++ b/arg_parser.c
@@ -0,0 +1,288 @@
+using namespace std;
+#include "arg_parser.h"
+#include <complex>
+#include <stdexcept>
+
+using namespace std;
+
+// Convert a string to this-or-that representation.
+// Specialization is not the same as instantiation!
+// Atox<int> and Atox<double> need to come early,
+// because they get used by later things.
+
+template<typename T> struct cvt2 {
+ static double doit(string const sss){
+ stringstream parse;
+ parse.str(sss);
+ T rslt;
+ parse >> rslt;
+ if (parse.fail()) throw invalid_argument("'" + sss + "' isn't a proper " + myTraits<T>::name);
+ if (parse.eof()) return rslt;
+ do {
+ char ch;
+ parse >> ch;
+ if (parse.fail()) break;
+ if (ch == 'k') rslt *= 1024;
+ else if (ch == 'M') rslt *= 1048576;
+ else break;
+ parse >> ws; // without this, we wouldn't know whether we were at EoF
+ if (!parse.eof()) break;
+ return rslt; // whew! good.
+ } while (0);
+ throw invalid_argument("'" + sss + "' has extraneous verbiage");
+ return 0; /* should never happen */
+ }
+};
+
+
+template<typename T> struct helper{
+ static T convert(string const &arg);
+};
+
+template<> struct helper<string> {
+ static string convert(string const &arg){
+ return arg;
+ }
+};
+
+template<> struct helper<int> {
+ static int convert(string const &arg){
+ return cvt2<int>::doit(arg.c_str());
+ }
+};
+
+template<> struct helper<double> {
+ static double convert(string const &arg){
+ if (tolower(arg) == "inf") return Inf;
+ if (tolower(arg) == "-inf") return -Inf;
+ return cvt2<double>::doit(arg.c_str());
+ }
+};
+
+// convert to double, then downgrade to float:
+template<> struct helper<float> {
+ static float convert(string const &arg){
+ return helper<double>::convert(arg);
+ }
+};
+
+template<typename B> struct helper<vector<B> > {
+ static vector<B> convert(string const &_arg){
+ string arg(_arg);
+ vector<B> rslt;
+ while (arg.length()) {
+ string::size_type where = arg.find(',');
+ string word;
+ if (where == arg.npos) {
+ word = arg;
+ arg = "";
+ } else {
+ word = arg.substr(0, where);
+ arg = arg.substr(1+where);
+ }
+ rslt.push_back(helper<B>::convert(word));
+ }
+ return rslt;
+ }
+};
+
+template<typename B>
+string num_from_list(const string arg, B& rslt){
+ if (arg.length()) {
+ string::size_type where = arg.find_first_of(", \t");
+ string word;
+ if (where == arg.npos) {
+ rslt = helper<B>::convert(arg);
+ return "";
+ } else {
+ rslt = helper<B>::convert(arg.substr(0, where));
+ return arg.substr(1+where);
+ }
+ }
+// should never get here
+ return arg;
+}
+
+template<typename B> struct helper<complex<B> > {
+ static complex<B> convert(string const &_arg){
+ string arg(_arg);
+ B rp(0), ip(0);
+ arg = num_from_list(arg, rp);
+ arg = num_from_list(arg, ip);
+ return complex<B>(rp, ip);
+ }
+};
+
+// This is the interface that ordinary users use.
+// The point of all this is that you can perform
+// "partial specification" (what I might call re-specialization)
+// on classes, but not on functions.
+template <typename T>
+T Atox(std::string const &str) {
+ // Just delegate.
+ return helper<T>::convert(str);
+}
+
+//////////////////////////////////
+// Various getters.
+
+//+++ fileGetter ....
+//ordinary constructor:
+ fileGetter:: fileGetter(const std::string fname){
+ file.open(fname.c_str());
+ }
+
+ std::string fileGetter::get(){
+ std::string str;
+ for (;;) {
+ file >> str; // try to read one word
+ if (file.fail()) return ""; // failed
+ if (str[0] != '#') return str; // got a comment;
+ string junk;
+ getline(file, junk); //throw it away
+ // go around the loop, read next line
+ }
+ }
+
+// beware that neither bad() nor fail() is the opposite of good()
+ int fileGetter::fail() {
+ return file.fail();
+ }
+
+//+++ cmdGetter ....
+//ordinary constructor:
+ cmdGetter::cmdGetter(int const _argc, char const * const * _argv)
+ : argc(_argc), argv(_argv), wasgood(argc) {}
+
+ std::string cmdGetter::get(){
+ if (argc <= 0) {
+ wasgood = 0;
+ return "";
+ }
+ wasgood = argc--;
+ return *argv++;
+ }
+
+ int cmdGetter::fail() {
+ return !wasgood;
+ }
+
+// That's all the getters for now.
+//////////////////////
+
+string tolower(const string& arg){
+ string dupe(arg);
+ for (string::iterator foo = dupe.begin();
+ foo != dupe.end(); foo++){
+ *foo = tolower(*foo);
+ }
+ return dupe;
+}
+
+string arg_parser::xcase(const string& arg){
+ if (!fold_case) return arg;
+ return tolower(arg);
+}
+
+// Constructor, for reading words from command line:
+ arg_parser::arg_parser(int const _argc, char const * const * _argv)
+ : fold_case(0)
+ {
+ std::shared_ptr<baseGetter> tmp (new cmdGetter(_argc, _argv));
+ getsome.push_front(tmp);
+ }
+
+// Constructor, for reading words from file:
+ arg_parser::arg_parser(const std::string fname)
+ : fold_case(0), keyword("(?" "?)"), narg(0)
+ {
+ push(fname);
+ }
+
+ void arg_parser::push(const std::string fname){
+ std::shared_ptr<baseGetter> tmp (new fileGetter(fname));
+ getsome.push_front(tmp);
+ }
+
+ int arg_parser::fail(){
+ if (!getsome.size()) return 1; // is this really correct?
+ return getsome.front()->fail();
+ }
+
+// see if cur arg is a prefix of patt */
+int arg_parser::prefix(const string& kwpatt){
+ if ((cur.length()==0 && kwpatt.length()!=0)) return 0; // disallow "", unless exact match
+ if (xcase(cur) != xcase(kwpatt).substr(0, cur.length())) return 0;
+ keyword = kwpatt;
+ narg = 0;
+ return 1;
+}
+
+ template<class T=std::string> T arg_parser::nextRaw(){
+ cur = "";
+ for (;;) {
+ if (!getsome.size()) {
+ break;
+ }
+ cur += getsome.front()->get();
+ if (fail()) {
+ // here if read failed; pop the context
+ getsome.pop_front();
+ // if read failed late; return what we have
+ if (cur.length()) return Atox<T>(cur);
+ // if read failed early, try again in parent context
+ continue;
+ }
+ size_t len = cur.length();
+ if (len == 0) break; // zero length, can't have a comma
+ if (cur[len-1] != ',') break;
+ // else has a comma, keep reading
+ }
+ return Atox<T>(cur);
+ }
+
+// return next item, assuming it is an argument:
+template<class T=std::string> T arg_parser::nextArg(){
+ if (0) cerr << "next called with narg: " << narg
+ <<" kw: " << keyword
+ << endl;
+ T rslt = nextRaw<T>();
+ narg++;
+ if (fail()) {
+ ostringstream buf;
+ buf << "Keyword " << keyword
+ << " requires argument #" << narg
+ << " of type " << myTraits<T>::name;
+ throw invalid_argument(buf.str());
+ }
+ return rslt;
+}
+
+template <typename T>
+arg_parser& arg_parser::operator>>(T& val){
+ val = nextArg<T>();
+ return *this;
+}
+
+// from .h file
+// template<typename T> T Atox(std::string const &foo);
+
+// specializations and explicit instantiations:
+// The first two lines of this macro help us figure
+// out the /name/ of a type.
+#define setup(T) \
+ template <> const char* myTraits<T >::name = #T; /* specialization */ \
+ template struct myTraits<T >; /* instantiation */ \
+ template arg_parser& arg_parser::operator>>(T& val); \
+ template T Atox<T>(std::string const &arg); \
+ template T arg_parser::nextRaw<T>();
+
+// Last but not least, instantiate the things we are going to need.
+// I don't know why the compiler can't do this automatically.
+setup(int);
+setup(double);
+setup(float);
+setup(string);
+setup(vector<int>);
+setup(vector<double>);
+setup(complex<double>);
diff --git a/arg_parser.h b/arg_parser.h
new file mode 100644
index 0000000..2ee69c6
--- /dev/null
+++ b/arg_parser.h
@@ -0,0 +1,95 @@
+#ifndef ARG_PARSER_H
+#define ARG_PARSER_H
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <list>
+#include <vector>
+#include <fstream>
+#include <sstream> // for basic parsing
+ // and for formatting error msgs
+#include <limits> // for ::infinity()
+
+// TODO: use "some_sstream >> some_int" instead of Atoi
+// since that allows more error checking
+
+#ifndef HAVE_INF
+double const Inf = std::numeric_limits<double>::infinity();
+#define HAVE_INF
+#endif
+
+std::string tolower(const std::string& arg);
+
+// This is the interfact that ordinary users use;
+// implementation in arg_parser.c is slightly tricky,
+// but users never see that.
+template<typename T> T Atox(std::string const &foo);
+
+// The base class:
+class baseGetter{
+public:
+ virtual std::string get()=0;
+ virtual int fail()=0;
+ virtual ~baseGetter(){
+//---- cerr << "dtor: baseGetter" << std::endl;
+ };
+ baseGetter(){
+//---- cerr << "basic ctor: baseGetter" << std::endl;
+ }
+};
+
+// A Getter that reads words from the command line:
+class cmdGetter : public baseGetter{
+public:
+ int argc;
+ char const * const * argv;
+ int wasgood;
+
+//ordinary constructor:
+ cmdGetter(int const _argc, char const * const * _argv);
+ virtual std::string get();
+ virtual int fail();
+};
+
+// A Getter that reads words from a file:
+class fileGetter : public baseGetter{
+public:
+ std::fstream file;
+
+//ordinary constructor:
+ fileGetter(const std::string fname);
+ virtual std::string get();
+ virtual int fail();
+ void dummy() {
+ if (Inf);
+ }
+};
+
+class arg_parser{
+public:
+ std::list <std::shared_ptr<baseGetter> > getsome;
+ std::string cur;
+ int fold_case;
+ std::string keyword;
+ int narg; // number of args picked up using >> after this keyword
+
+// Constructor, for reading words from command line:
+ arg_parser(int const _argc, char const * const * _argv);
+// Constructor, for reading words from file:
+ arg_parser(const std::string fname);
+ void push(const std::string fname);
+ int fail();
+ template<class T=std::string> T nextArg();
+ template<class T=std::string> T nextRaw();
+
+// see if cur arg is a prefix of patt */
+ int prefix(const std::string& patt);
+ template <typename T> arg_parser& operator>>(T& val);
+ std::string xcase(const std::string& arg);
+};
+
+template<typename T> struct myTraits {static const char* name;};
+
+#define arg_parser_h
+#endif
diff --git a/binomial-coefficient-choose.h b/binomial-coefficient-choose.h
new file mode 100644
index 0000000..cd0d91f
--- /dev/null
+++ b/binomial-coefficient-choose.h
@@ -0,0 +1,6 @@
+#ifndef BINOMIAL_COEFFICIENT_CHOOSE_H
+#define BINOMIAL_COEFFICIENT_CHOOSE_H
+
+int binomialCoeff(int const n, int const k);
+
+#endif
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..45009b0
--- /dev/null
+++ b/makefile
@@ -0,0 +1,32 @@
+LDFLAGS := -lnlopt
+
+c_sources := poiss.c arg_parser.c parse_csv.c
+
+% : %.c # cancel built-in one-step rule
+
+%.o : %.c
+ g++ -g -O2 -Wall $< -c
+
+% : %.o
+ g++ $^ $(LDFLAGS) -o $@
+
+.PHONY : all
+
+## dependency-finding scheme (with local mods) based on:
+## http://www.gnu.org/manual/make-3.77/html_mono/make.html#SEC42
+## (requires an include statement at end of the makefile)
+%.d: %.c
+ @$(SHELL) -ec '$(CXX) -MM $(CXXFLAGS) $< \
+ | sed '\''s/\($*\)\.o[ :]*/\1.o $@ : /g'\'' > $@; \
+ [ -s $@ ] || rm -f $@'
+
+################ end of pattern rules, start of real rules
+
+all : poiss
+
+poiss : poiss.o arg_parser.o parse_csv.o
+ g++ $^ $(LDFLAGS) -o $@
+
+ifndef NO_DOT_D
+ include $(c_sources:.c=.d)
+endif
diff --git a/parse_csv.c b/parse_csv.c
new file mode 100644
index 0000000..12e98f3
--- /dev/null
+++ b/parse_csv.c
@@ -0,0 +1,184 @@
+#include <istream>
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include "parse_csv.h"
+using namespace std;
+
+/*************
+
+Format defined:
+ https://tools.ietf.org/html/rfc4180
+See also:
+ https://en.wikipedia.org/wiki/Comma-separated_values
+
+We depart from the RFC by ignoring \r, so that lines can
+end with either \r\n (dos style) or simply \n (unix style).
+
+We check for malformed quoted fields and throw exceptions.
+
+The last line of the input .csv file may or may not
+contain a newline:
+:; echo -en "xxx\n" | ./parse_csv_demo - | od -b -Anone
+ 170 170 170 012
+:; echo -en "xxx" | ./parse_csv_demo - | od -b -Anone
+ 170 170 170 012
+
+A line containing nothing but the newline has one field
+with a zero-length datum:
+:; echo -en "\n" | ./parse_csv_demo - | od -b -Anone
+ 012
+
+A line containing no data /and/ no newline isn't a line
+at all. No line, no fields, no data:
+:; echo -en "" | ./parse_csv_demo - | wc
+ 0 0 0
+
+************/
+
+string const Q("\"");
+
+enum class CSVState {
+ Start,
+ UnquotedField,
+ QuotedField,
+ QuoteInQuote
+};
+
+// Read one row of .csv table.
+// Return a zero-length vector if we encounter EoF
+// before seeing any data.
+// Note that a table row can extend across multiple .csv file lines,
+// if there are embedded newlines.
+//
+// Within quoted fields, we accept commas, quotes, and newlines,
+// e.g "Hughie, ""Louie"", Dewey".
+template <class datum = qstring>
+vector<datum> readCSVRow(istream& input) {
+ using namespace std;
+ if (input.eof()) return vector<datum>(0);
+ CSVState state = CSVState::Start;
+ vector<datum> fields(0);
+ char c;
+ for (;;) {
+ input.read(&c, 1);
+ if (input.eof()) {
+ if (state == CSVState::QuotedField)
+ throw runtime_error("Unterminated quote in .csv file in field# "
+ + to_string(fields.size())
+ + "\n... namely <"
+ + Q + fields.back() + ">");
+ break;
+ }
+ if (!input.good()) throw runtime_error("input error in .csv file");
+
+// if we made it this far, the line contains at least one datum:
+ if (fields.size() == 0) fields.push_back(datum());
+ switch (state) {
+ case CSVState::Start:
+ switch (c) {
+ case ',': // zero-length field
+ fields.push_back(datum());
+ break;
+ case '"':
+ state = CSVState::QuotedField;
+// This is no-op if the datum is not of type qstring:
+ set_q(fields.back(), 1);
+ break;
+ case '\r':
+ break;
+ case '\n':
+ goto EndRecord;
+ default:
+ state = CSVState::UnquotedField;
+ fields.back().push_back(c);
+ break;
+ }
+ break;
+ case CSVState::UnquotedField:
+ switch (c) {
+ case ',': // start a new field:
+ state = CSVState::Start;
+ fields.push_back(datum());
+ break;
+ case '"': throw runtime_error
+ ("Stray quote in .csv file in field# "
+ + to_string(fields.size())
+ + "\n... namely <"
+ + fields.back() + Q + ">"
+ );
+ break;
+ case '\r': break;
+ case '\n': goto EndRecord;
+ default: fields.back().push_back(c);
+ break;
+ }
+ break;
+ case CSVState::QuotedField:
+ switch (c) {
+ case '"': state = CSVState::QuoteInQuote;
+ break;
+// Accept anything except quote here;
+// that includes comma, \r, and \n.
+ default: fields.back().push_back(c);
+ break;
+ }
+ break;
+// Previous state saw a quote; we consider what follows:
+ case CSVState::QuoteInQuote:
+ switch (c) {
+ case ',': // comma after closing quote
+ state = CSVState::Start;
+ fields.push_back(datum());
+ break;
+ case '"': // double ", map to plain "
+ fields.back().push_back('"');
+ state = CSVState::QuotedField;
+ break;
+ case '\r': break;
+ case '\n': goto EndRecord;
+ default: throw runtime_error
+ ("Extraneous verbiage in .csv file after quoted field# "
+ + to_string(fields.size())
+ + "\n... namely <"
+ + Q + fields.back() + Q
+ + string(1,c) +">");
+ break;
+ }
+ break;
+ }
+ }
+ EndRecord:;;;;
+ return fields;
+}
+
+// Read entire CSV file.
+// Within quoted fields, we accept commas, quotes, and newlines,
+// e.g "Hughie, ""Louie"", Dewey".
+template <class datum = qstring>
+vector<vector<datum> > readCSV(istream &in) {
+ using namespace std;
+ vector<vector<datum> > table;
+ datum row;
+ while (!in.eof()) {
+ auto fields = readCSVRow<datum>(in);
+ if (fields.size() == 0) break;
+ table.push_back(fields);
+ }
+ return table;
+}
+
+// Explicit instantiation for the <qstring> version.
+// If you want some other version, you'll have to instantiate it.
+template vector<qstring> readCSVRow<qstring>(istream& input);
+template vector<vector<qstring> >readCSV<qstring>(istream &in);
+// Apparently it's not necessary to mention get_q() here,
+// but probably more portable to do it anyway:
+////template int get_q<qstring>(qstring const&);
+
+#if 1
+// Explicit instantiation for the <string> version.
+template vector<string> readCSVRow<string>(istream& input);
+template vector<vector<string> >readCSV<string>(istream &in);
+/////template int get_q<string>(string const&);
+#endif
diff --git a/parse_csv.h b/parse_csv.h
new file mode 100644
index 0000000..362c478
--- /dev/null
+++ b/parse_csv.h
@@ -0,0 +1,34 @@
+#ifndef PARSE_CSV__H
+#define PARSE_CSV__H
+#include <vector>
+
+struct qstring : public std::string {
+ int q; // was explicitly quoted
+// Constructors:
+ qstring(): q(0) {}
+ qstring(int const _q): q(_q) {}
+ qstring(std::string const &s): std::string(s), q(0) {}
+ qstring(std::string const &s, int const _q): std::string(s), q(_q) {}
+};
+
+// Helper functions, not class member functions:
+// Simple case, do nothing:
+template<class T> inline void set_q(T& target, int const val) {}
+template<class T> inline int get_q(T const& target) {return 0;}
+
+// Specialization: Fancy case, actually act on q:
+template<> inline void set_q(qstring& target, int const val) {
+ target.q = val;
+}
+
+template<> inline int get_q(qstring const& target){
+ return target.q;
+}
+
+template <class datum = qstring>
+std::vector<datum> readCSVRow(std::istream& input);
+
+template <class datum = qstring>
+std::vector<std::vector<datum> > readCSV(std::istream &in);
+
+#endif
diff --git a/poiss.c b/poiss.c
new file mode 100644
index 0000000..70a20b9
--- /dev/null
+++ b/poiss.c
@@ -0,0 +1,658 @@
+/////////////////////////
+
+#include <iostream>
+using namespace std;
+
+void usage() {
+ cout << R"EoF(Program to fit to Poisson distributed data.
+Uses actual Poisson probability (i.e. not Gaussians, and
+therefore not least squares).
+
+Options include:
+ -h # print this message (and immediately exit)
+ -ifile $fn # input file, date for -fit
+ -gfile $fn # log information about initial guesses for -fit
+ -fit $fn # do the fit; send results to $fn
+
+# the rest are for testing and background investigations:
+ -dkcurve $fn # smooth curves and scattered data at fake abscissas
+ -entropy $fn # entropy of the Poisson distro
+ # as a function of the intensity (lambda)
+ -seed $str # seed the RNG with $str
+ -seed -- # seed the RNG from /dev/random
+
+To write to stdout, use "-" for the $fn.
+
+See ./doit for a working example, including the SS --meld stanzas.
+)EoF";
+}
+
+#include <iomanip> /* for setw */
+#include <vector>
+#include <nlopt.hpp> /* the minimum-finder */
+#include "arg_parser.h"
+#include <string>
+#include <sstream>
+#include <map>
+#include "parse_csv.h"
+#include "poissonier.h"
+
+// Used for equal weighting:
+vector<double> const u5(5, 1.0);
+
+// Class to describe a sitch, i.e. a situation, i.e.
+// everything the objective function needs to know:
+struct sitcher {
+ vector<double> norm; // keep the adjustable parameters close to unity
+ vector<double> t1;
+ vector<double> t2;
+ vector<int> obs; // actual observed counts in the interval [t1, t2]
+};
+
+// Instantaneous rate at time ttt.
+// Takes parameters two at a time: (amplitude, decay rate).
+// If there are an odd number of parameters,
+// the last one is the baseline, i.e. independent of time,
+// equivalent to a decay with the rate locked at zero.
+// Decay rates are in nats per second.
+double inst_multi_exp(vector<double> const& prm,
+ double const ttt, vector<double> const norm) {
+ double rslt(0);
+ int nn = prm.size();
+ for (int ii = 0; ii < nn; ii+=2){
+ double amp(prm[ii] * norm[ii]);
+ double rate(1+ii < nn ? prm[1+ii] * norm[1+ii] : 0.);
+ if (rate) {
+ double rate(prm[1+ii]);
+// use the negative rate:
+ rslt += amp * exp(- rate * ttt);
+ } else {
+ rslt += amp;
+ }
+ }
+ return rslt;
+}
+
+// Format all the elements of a C++ vector,
+// creating a string suitable for printing.
+template<class T>
+string dump(vector<T> const arg) {
+ stringstream rslt;
+ string sep = "";
+ for (T val : arg) {
+ rslt << sep << val;
+ sep = ", ";
+ }
+ return rslt.str();
+}
+
+// Format all the elements of a counted array,
+// creating a string suitable for printing.
+template<class T>
+string dump(int const nn, T const arg[]) {
+ stringstream rslt;
+ string sep = "";
+ for (int ii = 0; ii < nn; ii++) {
+ rslt << sep << arg[ii];
+ sep = ",";
+ }
+ return rslt.str();
+}
+
+// numerically accurate sinh(arg)/arg
+template<class T>
+inline T sinhc(T arg) {
+ if (abs(arg) < 1e-4) return 1 + arg*arg/6;
+ return sinh(arg)/arg;
+}
+
+struct evendor {
+ double t; // time
+ double n; // number of events
+};
+
+// Intensity, usually denoted lambda,
+// i.e. integral of rate(t) dt over the given interval.
+double inty_multi_exp(vector<double> const& prm,
+ double const t1, double const t2, vector<double> const norm) {
+ double rslt(0);
+ int nn = prm.size();
+ for (int ii = 0; ii < nn; ii+=2){
+ double amp(prm[ii] * norm[ii]);
+ double rate(1+ii < nn ? prm[1+ii] * norm[1+ii] : 0.);
+ double mid((t2 + t1) / 2.);
+ double tau(t2 - t1);
+ rslt += amp * exp(-rate*mid) * tau * sinhc(rate*tau/2.);
+ }
+ return rslt;
+}
+
+// the actual objective function
+// returns total log probability, summed over all data points
+//
+// _sitch is unchanged,
+// but cannot be declared const, because bobyqa wouldn't like that.
+double limp(unsigned int const nprm, double const* _prm,
+ double * _grad, void * _sitch) {
+ sitcher& sitch(*(sitcher*)(_sitch));
+ if (_grad != 0) throw invalid_argument("grad");
+ if (sitch.norm.size() != nprm) throw invalid_argument("norm");
+ vector<double> prm(_prm, _prm+nprm);
+ if (prm.size() != nprm) throw logic_error("vector ctor");
+ double rslt(0);
+ int npts(sitch.t1.size());
+ for (int ii = 0; ii < npts; ii++){
+ double inty = inty_multi_exp(prm, sitch.t1[ii], sitch.t2[ii], sitch.norm);
+ rslt -= poissonier::lpmd(inty, sitch.obs[ii]);
+ }
+ return rslt;
+}
+
+// Scan the data array,
+// looking for records that look like headers
+// (as opposed to numerical data).
+int count_header(vector<vector<string> > const aoa){
+ int NR = aoa.size();
+ int hh = 0;
+ for (; hh < NR; hh++) {
+ if (aoa[hh].size() == 0) continue;
+ string word = aoa[hh][0];
+ size_t where;
+ where = word.find_first_not_of(" ");
+ if (where == string::npos) continue;
+ word = word.substr(where);
+ try {
+ stod(word, &where); // don't care about return value
+ }
+ catch (exception& ee) {
+ continue;
+ }
+ word = word.substr(where);
+ where = word.find_first_not_of(" ");
+ if (where == string::npos) break;
+ }
+ return hh;
+}
+
+// Some tricky heuristics.
+// Guess a starting point for the fitting process
+// (i.e. minimization process).
+// A good guess speeds things up and greatly increases
+// the chance of finding the global minimum (as opposed
+// to getting stuck in a worthless local minimum, or
+// diverging to arrant nonsense).
+struct guesser {
+ int hh;
+ int npts;
+ vector<evendor> evt;
+ vector<double> model;
+ sitcher sitch;
+
+ guesser() : hh(0), npts(0) {}
+
+ void setup(vector<vector<string> > const aoa) {
+ int NR = aoa.size(); // includes header row
+ if (NR == 0) throw invalid_argument("Zero-length input file");
+ hh = count_header(aoa);
+ npts = NR - hh;
+ if (0) cout << "NR: " << NR
+ << " npts: " << npts
+ << endl;
+ evt = vector<evendor>(npts);
+ double minute(60);
+ for (int ii = 0; ii < npts; ii++) {
+ // file times are in minutes; convert to SI units here:
+ evt[ii].t = stod(aoa[hh+ii][0]) * minute;
+ evt[ii].n = stod(aoa[hh+ii][1]);
+ }
+
+ double max_t = evt[npts-1].t;
+ double max_n = evt[npts-1].n;
+
+ // starting point of the baseline estimate,
+ // as a fraction of the total span of the data,
+ // assuming data starts at zero:
+ double tail_frac = 0.9;
+ int tail_i = -1;
+
+ // half life (in seconds) of the fast component:
+ double fast_h = 307.2;
+ double fast_a = M_LN2 / fast_h;
+ int fast_i = -1;
+
+ // half life (in seconds) of the slow component:
+ double slow_h = 45720;
+ double slow_a = M_LN2 / slow_h;
+
+ double dead(5); // 2**-5 = 3%
+
+ for (int ii = 0; ii < npts; ii++) {
+ double time = evt[ii].t;
+ if (time < tail_frac * max_t) tail_i = ii;
+ if (time < fast_h * dead) fast_i = ii;
+ }
+
+ if (0) cout << "npts: " << npts
+ << " : " << max_t
+ << "," << max_n
+ << endl;
+
+ double tail_t = evt[tail_i].t;
+ double tail_n = evt[tail_i].n;
+ double tail_r = (max_n - tail_n) / (max_t - tail_t);
+
+ if (0) cout << "tail_i: " << tail_i
+ << " : " << tail_t
+ << "," << tail_n
+ << " " << tail_r
+ << endl;
+
+ double fast_t = evt[fast_i].t;
+ double fast_n = evt[fast_i].n;
+ if (0) cout << "fast_i: " << fast_i
+ << " : " << fast_t
+ << "," << fast_n
+ // << " " << fast_r
+ << endl;
+
+ if (tail_i < 0) throw invalid_argument("wtf?");
+ if (fast_i < 0) throw invalid_argument("wtf?");
+
+ // beginning times:
+ double slow_tb = evt[fast_i].t;
+ double slow_nb = evt[fast_i].n;
+
+ double slow_dt = max_t - slow_tb;
+ double slow_dn = max_n - slow_nb;
+ double slow_mag = slow_dn - slow_dt * tail_r;
+ slow_mag /= (1 - exp(-slow_a * slow_dt) * (1 + slow_dt * slow_a));
+ // valid at time slow_tb:
+ double slow_rx = slow_mag * slow_a;
+ // valid at time zero:
+ double slow_r = slow_rx * exp(slow_a * slow_tb);
+
+ double slow_rem = slow_r * exp(-slow_a * max_t);
+ double bl_r = tail_r - slow_rem;
+
+ if (0) cout << "slow_r: " << slow_r
+ << " slow_rx: " << slow_rx
+ << " slow_mag: " << slow_mag
+ << " slow_rem: " << slow_rem
+ << " tail_r: " << tail_r
+ << " bl_r: " << bl_r
+ << endl;
+
+ vector<double> slow_model{slow_r, slow_a, bl_r};
+
+ double fast_t0 = evt[0].t;
+ double fast_n0 = evt[0].n;
+ double fast_t1 = evt[fast_i].t;
+ double fast_n1 = evt[fast_i].n;
+ double model_inty = inty_multi_exp(slow_model, fast_t0, fast_t1, u5);
+ double fast_tot = fast_n1 - fast_n0;
+ double fast_dn = fast_tot - model_inty;
+ double fast_r = fast_dn * fast_a;
+ if (0) cout << "fast_r: " << fast_r
+ << " fast_dn: " << fast_dn
+ << " fast_tot: " << fast_tot
+ << " model_inty: " << model_inty
+ << endl;
+
+ model = vector<double>{fast_r, fast_a};
+ model.insert(model.end(), slow_model.begin(), slow_model.end());
+ }
+
+// Reformat the data, to make it more useful to
+// the objective function:
+ void more() {
+ for (int ii = 1; ii < npts; ii++) {
+ sitch.t1.push_back(evt[ii-1].t);
+ sitch.t2.push_back(evt[ii ].t);
+ sitch.obs.push_back(evt[ii].n - evt[ii-1].n);
+ }
+ }
+
+// Show the guessed parameters.
+// Also do a sweep of one variable, as a qualitative sanity check.
+ void show(string const ofile) {
+ if (ofile == "") return;
+ ofstream xxout;
+ if (ofile != "-") {
+ xxout.open(ofile);
+ }
+ ostream& xout(ofile != "-" ? xxout : cout);
+
+ size_t ss = sitch.t1.size();
+ xout << ss << endl;
+ xout << sitch.t1[ss-1]
+ << " " << sitch.t2[ss-1]
+ << " " << sitch.obs[ss-1]
+ << endl;
+
+ vector<double> prm(u5);
+
+ xout << dump(model) << endl;
+
+ xout << "bl/norm,bl,limp" << endl;
+ double bl0 = prm[4];
+ void* context = (void*)(&sitch);
+ int nprm(prm.size());
+
+ for (double bl = bl0*.9; bl <= bl0*1.1; bl += bl/100.) {
+ prm[4] = bl;
+ double limpy = limp(nprm, prm.data(), 0, context);
+ xout << bl
+ << "," << bl*sitch.norm[4]
+ << "," << limpy << endl;
+ }
+ }
+};
+
+// Decode result codes returned by nlopt functions.
+// This is documented to be part of the nlopt library
+// but is absent from the version I have.
+const char *nlopt_result_to_string(nlopt_result result)
+{
+ switch(result)
+ {
+ case NLOPT_FAILURE: return "FAILURE";
+ case NLOPT_INVALID_ARGS: return "INVALID_ARGS";
+ case NLOPT_OUT_OF_MEMORY: return "OUT_OF_MEMORY";
+ case NLOPT_ROUNDOFF_LIMITED: return "ROUNDOFF_LIMITED";
+ case NLOPT_FORCED_STOP: return "FORCED_STOP";
+ case NLOPT_SUCCESS: return "SUCCESS";
+ case NLOPT_STOPVAL_REACHED: return "STOPVAL_REACHED";
+ case NLOPT_FTOL_REACHED: return "FTOL_REACHED";
+ case NLOPT_XTOL_REACHED: return "XTOL_REACHED";
+ case NLOPT_MAXEVAL_REACHED: return "MAXEVAL_REACHED";
+ case NLOPT_MAXTIME_REACHED: return "MAXTIME_REACHED";
+///??????? case NLOPT_NUM_RESULTS: return NULL;
+ }
+ return NULL;
+}
+
+// Add something to a particular component of a vector.
+template<class T>
+vector<T> goose(vector<T> const& vvv, int const ii, T const delta) {
+ vector<T> rslt(vvv);
+ rslt[ii] += delta;
+ return rslt;
+}
+
+void do_fit(poissonier& poi, string const ifile,
+ string const ofile, string const gfile) {
+ if (ifile == "") {
+ if (ofile != "") throw invalid_argument("do_fit needs an input file");
+ return;
+ }
+
+ ifstream inx;
+ if (ifile != "-") {
+ inx.open(ifile);
+ if (! inx.good()) {
+ cerr << "Cannot open input '" << ifile << "'" << endl;
+ exit(1);
+ }
+ }
+
+ istream& in(ifile == "-" ? cin : inx);
+ vector<vector<string> > aoa;
+ aoa = readCSV<string>(in);
+ guesser ggg;
+ ggg.setup(aoa);
+ ggg.more();
+ ggg.sitch.norm = ggg.model;
+ ggg.show(gfile);
+
+ vector<double> prm = u5;
+ int nprm(prm.size());
+ vector<double> const lower(nprm, 0.5);
+ vector<double> const upper(nprm, 1.5);
+
+ nlopt_result rslt = NLOPT_FORCED_STOP; // avoid "unused" warning
+ double limp_end(-9e99);
+ vector<double> found(prm); // will get modified in place
+ int OK(0);
+ try {
+ void* context = (void*)(&ggg.sitch);
+ nlopt_opt oppy = nlopt_create(NLOPT_LN_BOBYQA, nprm);
+ rslt = nlopt_set_lower_bounds1(oppy, 0.5);
+ rslt = nlopt_set_upper_bounds1(oppy, 1.5);
+ rslt = nlopt_set_min_objective(oppy, limp, context);
+ rslt = nlopt_set_xtol_rel(oppy, 1e-7);
+ rslt = nlopt_optimize(oppy, found.data(), &limp_end);
+ OK = 1;
+ }
+ catch (exception& eee) {
+ cout << "Fitting bombed out: " << eee.what() << endl;
+ }
+
+ if (OK && ofile != "") {
+ ofstream xxout;
+ if (ofile != "-") {
+ xxout.open(ofile);
+ }
+ ostream& xout(ofile != "-" ? xxout : cout);
+
+ double DoF(ggg.npts - nprm);
+ xout << ifile << endl;
+ cout << "Fit returns:, " << rslt
+ << ", i.e., " << nlopt_result_to_string(rslt)
+ << ", limp:, " << limp_end
+ << ", perdof:, " << limp_end / (ggg.npts - nprm)
+ << endl;
+ xout << "Fit returns:, " << rslt
+ << ", i.e., " << nlopt_result_to_string(rslt)
+ << ", limp:, " << limp_end
+ << ", perdof:, " << limp_end / DoF
+ << endl;
+ xout << endl;
+ xout << ",fast.amp, fast.dk, slow.amp, slow.dk, bl" << endl;
+ xout << "Normed:, " << dump(found) << endl;
+ vector<double> combi(nprm);
+ for (int ii = 0; ii < nprm; ii++) {
+ combi[ii] = found[ii] * ggg.sitch.norm[ii];
+ }
+ xout << "SI:, " << dump(combi) << endl;
+ vector<double> flip(combi);
+ flip[1] = M_LN2/combi[1];
+ flip[3] = M_LN2/combi[3];
+ xout << "½life:, " << dump(flip) << endl;
+
+// Calculate the uncertainties.
+// In particular, calculate the Mahalanobis metric
+// i.e. the second derivative (Hessian) of the log improbability.
+ vector<vector<double> > covar(nprm, vector<double>(nprm));
+ vector<double> probe(u5);
+ sitcher sitch(ggg.sitch);
+ sitch.norm = combi;
+ void* context = (void*)(&sitch);
+ double delta(0.01);
+ xout << endl;
+ for (int ii = 0; ii < nprm; ii++) {
+ string sep = "";
+ for (int jj = 0; jj < nprm; jj++) {
+ double maha;
+ if (ii != jj) {
+ maha = limp(nprm, goose(goose(
+ probe, ii, delta), jj, delta).data(), 0, context)
+ + limp(nprm, goose(goose(
+ probe, ii, -delta), jj, -delta).data(), 0, context)
+ - limp(nprm, goose(goose(
+ probe, ii, delta), jj, -delta).data(), 0, context)
+ - limp(nprm, goose(goose(
+ probe, ii, -delta), jj, delta).data(), 0, context);
+ maha = maha/4./delta/delta;
+ } else {
+ maha = limp(nprm, goose(probe, ii, delta).data(), 0, context)
+ + limp(nprm, goose(probe, ii, -delta).data(), 0, context)
+ -2.0*limp(nprm, probe.data(), 0, context);
+ maha = maha/delta/delta;
+ }
+ xout << sep << setprecision(3) << setw(11) << fixed << maha;
+ sep = ", ";
+ }
+ xout << endl;
+ }
+ }
+}
+
+// Crude reconnaissance.
+// Calculate the decay curves using made-up parameters.
+// Not in the critical path.
+void do_dk(poissonier& poi, string const ofile) {
+ if (ofile == "") return;
+ ofstream xxout;
+ if (ofile != "-") {
+ xxout.open(ofile);
+ }
+ ostream& xout(ofile != "-" ? xxout : cout);
+ sitcher sitch; // selected abscissas
+ sitcher smooth; // all abscissas
+
+ double step(1);
+ vector<double> prm( {30, .1, 10, .01, 1} );
+
+ int dt(5);
+ for (int ii = 0; ii < 500; ii+= dt) {
+ double tt1(step*ii);
+ double tt2(tt1 + dt);
+ double intensity(inty_multi_exp(prm, tt1, tt2, u5));
+ int obs = poi.sample(intensity);
+ sitch.t1.push_back(tt1);
+ sitch.t2.push_back(tt2);
+ sitch.obs.push_back(obs);
+ }
+
+ for (int ii = 0; ii < 500; ii++) {
+ smooth.t1.push_back(step*ii);
+ }
+
+// here with both sitchers fully constructed.
+
+ xout << "Seed:," << poi.graine << endl;
+// output both the smooth curves
+// and the observations (to the extent they exist)
+ for (int ii = 0; ii < 500; ii++) {
+ double tt1(smooth.t1[ii]);
+ double sminty(inst_multi_exp(prm, tt1, u5));
+ xout << tt1 << "," << sminty;
+ if (ii < (int) sitch.t1.size()) {
+ double fake(max(0.+sitch.obs[ii], 0.2)); // to facilitate log axes
+ double tt1(sitch.t1[ii]);
+ double tt2(sitch.t2[ii]);
+ double minty(inty_multi_exp(prm, tt1, tt2, u5));
+ xout << "," << tt1
+ << "," << tt2
+ << ",x"
+ << "," << minty
+ << "," << minty
+ << ",x"
+ << "," << fake
+ << "," << fake
+ << ",x";
+ }
+ xout << endl;
+ }
+}
+
+// Class used by do_entropy.
+// Use a class rather than a plain function,
+// because it returns multiple results.
+struct entroper {
+ double ptot, stot, ii;
+ void go(double const rw_obs, double const rw_mod) {
+ ptot = 0;
+ stot = 0;
+ int ii;
+ int lim = int(ceil(rw_obs));
+ lim *= 10;
+ for (ii = 0; ii < lim; ii++) {
+ if (ptot > .999999) break;
+ double pobs = poissonier::pmd(rw_obs, ii);
+ if (pobs != 0) {
+ double lpmod = poissonier::lpmd(rw_mod, ii);
+ ptot += pobs;
+ stot -= pobs * lpmod;
+ } else {
+ // don't do a calculation that could
+ // possibly multiply zero by -infinity.
+ }
+ }
+ }
+};
+
+// Out of curiosity, calculate the entropy of the Poisson distribution
+// as a function of intensity (lambda).
+// Not in the critical path.
+void do_entropy(string const ofile) {
+ if (ofile == "") return;
+ ofstream xxout;
+ if (ofile != "-") {
+ xxout.open(ofile);
+ }
+ ostream& xout(ofile != "-" ? xxout : cout);
+
+ double Delta(1.01);
+ xout << "Delta," << Delta << endl;
+ xout << "rw, ii, ptot, Scat, Sdog, Semu" << endl;
+ double ratio(pow(10., .005));
+ entroper cat, dog, emu;
+ for (double rw = 0.01; rw < 1500; rw *= ratio) {
+ cat.go(rw, rw);
+ dog.go(rw, rw/Delta);
+ emu.go(rw, rw*Delta);
+ xout << rw
+ << "," << cat.ptot
+ << "," << cat.ii
+ << "," << cat.stot
+ << "," << dog.stot
+ << "," << emu.stot
+ << endl;
+ }
+}
+
+int main(int argc, char * const * argv) {
+ poissonier poi;
+ string seed;
+ string ifile;
+ arg_parser arg(argc, argv);
+ arg.fold_case = 1;
+ string progname = arg.nextRaw();
+ string dkfile, entfile, fitfile, gfile;
+
+ for (;;){
+ string raw_arg = arg.nextRaw(); // get next keyword
+ if (arg.fail()) break;
+ int where = arg.cur.length();
+ if (where){
+ if (arg.cur[where-1] == ':') arg.cur = arg.cur.substr(0,where-1);
+ }
+ if (0) {}
+ else if (arg.prefix("help") || arg.prefix("-h")) {
+ usage();
+ exit(0);
+ }
+ else if (arg.prefix("-ifile")) arg >> ifile;
+ else if (arg.prefix("-seed")) arg >> seed;
+ else if (arg.prefix("-entropy")) arg >> entfile;
+ else if (arg.prefix("-dkcurve")) arg >> dkfile;
+ else if (arg.prefix("-gfile")) arg >> gfile;
+ else if (arg.prefix("-fit")) arg >> fitfile;
+ else {
+ cerr << "Unrecognized command '" << raw_arg << "'" << endl;
+ exit(1);
+ }
+ }
+ if (seed == "--") seed = poi.randomize();
+ else if (seed != "") poi.seed(seed);
+
+ if (!( fitfile.length()
+ || dkfile.length()
+ || entfile.length()
+ ))
+ dkfile = "-";
+ do_dk(poi, dkfile);
+ do_fit(poi, ifile, fitfile, gfile);
+ do_entropy(entfile);
+}
diff --git a/poissonier.h b/poissonier.h
new file mode 100644
index 0000000..95da15e
--- /dev/null
+++ b/poissonier.h
@@ -0,0 +1,108 @@
+#ifndef POISSONIER_H
+#define POISSONIER_H
+
+#include <random> /* for seed_seq */
+#include <iostream>
+#include <time.h> /* for clock_gettime */
+#include <vector>
+#ifdef BOOST_CPD
+# include <boost/math/distributions/poisson.hpp>
+#endif
+
+static char const printable[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789._";
+
+std::string base64(unsigned long int arg) {
+ std::string str;
+ for (;;) {
+ str = printable[arg & 63] + str;
+ arg >>= 6;
+ if (! arg) break;
+ }
+ return str;
+}
+
+std::string randomstr(unsigned int const nn=15) {
+ using namespace std;
+// FIXME: ??? Could maybe use std::bitset here:
+ random_device rd;
+ string str;
+ while (str.length() < nn) {
+ unsigned int foo(rd());
+ int nch = (sizeof(foo) * 8) / 6;
+ for (int ii = 0; ii < nch; ii++) {
+ str += printable[foo & 63];
+ foo >>= 6;
+ }
+ }
+ return str;
+}
+
+class poissonier {
+public:
+ std::mt19937 basegen;
+ std::string graine; // last seed fed to basegen
+
+// constructor
+ poissonier () {
+ timespec time;
+ clock_gettime(CLOCK_REALTIME, &time);
+ graine = base64(time.tv_sec);
+ graine += base64(time.tv_nsec);
+ seed(graine);
+ }
+ double sample(double const lambda) {
+// There is no way to change the intensity after the
+// distro is constructed, so construct a new one each time:
+ return std::poisson_distribution<>(lambda)(basegen);
+ }
+ void seed(std::string const seed) {
+// Need a temporary variable here, because the seed() function
+// modifies it:
+ graine = seed;
+ std::seed_seq seed1(seed.begin(), seed.end());
+ basegen.seed(seed1);
+ }
+ std::string randomize() {
+ graine = randomstr(15);
+ seed(graine);
+ return graine;
+ }
+ void check(double const rate) {
+ using namespace std;
+ cout << "Checking: " << rate
+ << " gives: " << sample(rate)
+ << endl;
+ }
+
+// Note this is stateless. Static member function.
+//
+// Log of the probability mass distribution:
+ static double lpmd(double const lambda, int const k){
+ // https://en.wikipedia.org/wiki/Poisson_distribution#Definition
+ if (lambda < 0) throw std::invalid_argument("negative Poisson intensity");
+ if (lambda == 0) {
+ if (k == 0) return 0; // 100% probability
+ return -INFINITY; // 0% probability
+ }
+ return k * log(lambda) - lambda - lgamma(k + 1.0);
+ }
+
+// pmd = probability mass distribution
+ inline static double pmd(double const lambda, int const k){
+ return exp(lpmd(lambda, k));
+ }
+
+#ifdef BOOST_CPD
+// not implemented
+// not needed, and would require boost
+// cpd = cumulative probability distribution
+ static double cpd(double const lambda, int const k){
+ boost::math::poisson_distribution<> pd(lambda);
+ return boost::math::cpd(pd, k);
+ }
+#endif
+};
+
+#endif /* POISSONIER_H */