Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added doc/feature_gain.doc
Binary file not shown.
7 changes: 4 additions & 3 deletions src/cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CXX = g++
CXXFLAGS = -Wall -Wextra -Wconversion -pedantic -fopenmp -O2
#CXXFLAGS = -Wall -Wextra -Wconversion -pedantic -fopenmp -O2 -DUSE_OPENMP

all: libgbdt.a $(tests) ctr gbdt_predict
all: libgbdt.a $(tests) ctr gbdt_predict gbdt_casetool

data.o: $(header_files) data.cpp
$(CXX) -c $(CXXFLAGS) data.cpp
Expand Down Expand Up @@ -38,7 +38,8 @@ ctr: libgbdt.a ctr.cpp

gbdt_predict: libgbdt.a gbdt_predict.cpp
$(CXX) $(CXXFLAGS) -o gbdt_predict gbdt_predict.cpp libgbdt.a

gbdt_casetool: libgbdt.a gbdt_casetool.cpp
$(CXX) $(CXXFLAGS) -o gbdt_casetool gbdt_casetool.cpp libgbdt.a

data_unittest: libgbdt.a data_unittest.cpp
$(CXX) $(CXXFLAGS) -o data_unittest data_unittest.cpp libgbdt.a
Expand All @@ -50,4 +51,4 @@ gbdt_unittest: libgbdt.a gbdt_unittest.cpp
$(CXX) $(CXXFLAGS) -o gbdt_unittest gbdt_unittest.cpp libgbdt.a

clean:
rm $(object_files) $(tests) libgbdt.a ctr gbdt_predict
rm $(object_files) $(tests) libgbdt.a ctr gbdt_predict gbdt_casetool
4 changes: 3 additions & 1 deletion src/cpp/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Configure {
Loss loss; // loss type

bool debug; // show debug info?
bool absolute_gain; // absolute_gain of gain, default gain.

double *feature_costs; // mannually set feature costs in order to tune the model
bool enable_feature_tunning; // when set true, `feature_costs' is used to tune the model
Expand All @@ -39,7 +40,8 @@ class Configure {
loss(SQUARED_ERROR),
debug(false),
feature_costs(NULL),
enable_feature_tunning(false) {}
enable_feature_tunning(false),
absolute_gain(false) {}

~Configure() { delete[] feature_costs; }

Expand Down
13 changes: 13 additions & 0 deletions src/cpp/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ ValueType GBDT::Predict(const Tuple &t, size_t n, double *p) const {

return r;
}
ValueType GBDT::Predict(const Tuple &t, size_t n, double *p, bool absolute_gain) const {
if (!trees)
return kUnknownValue;

assert(n <= iterations);

ValueType r = bias;
for (size_t i = 0; i < n; ++i) {
r += shrinkage * trees[i].Predict(t, p, absolute_gain);
}

return r;
}

void GBDT::Init(const DataVector &d, size_t len) {
assert(d.size() >= len);
Expand Down
5 changes: 5 additions & 0 deletions src/cpp/gbdt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class GBDT {
ValueType Predict(const Tuple &t, double *p) const {
return Predict(t, iterations, p);
}

ValueType Predict(const Tuple &t, double *p, bool absolute_gain) const {
return Predict(t, iterations, p, absolute_gain);
}

std::string Save() const;
void Load(const std::string &s);
Expand All @@ -30,6 +34,7 @@ class GBDT {
private:
ValueType Predict(const Tuple &t, size_t n) const;
ValueType Predict(const Tuple &t, size_t n, double *p) const;
ValueType Predict(const Tuple &t, size_t n, double *p, bool absolute_gain) const;
void Init(const DataVector &d, size_t len);
private:
RegressionTree *trees;
Expand Down
86 changes: 86 additions & 0 deletions src/cpp/gbdt_casetool.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Author: [email protected] (Yiping Qi)

#include "gbdt.hpp"
#include "fitness.hpp"
#include <fstream>
#include <cassert>
#include <cstring>
#include <iostream>
#include <boost/lexical_cast.hpp>

using namespace gbdt;

int main(int argc, char *argv[]) {
std::string model;
std::ifstream stream(argv[1]);
assert(stream);

stream.seekg(0, std::ios::end);
model.reserve(stream.tellg());
stream.seekg(0, std::ios::beg);
model.assign(std::istreambuf_iterator<char>(stream),
std::istreambuf_iterator<char>());

GBDT gbdt;
gbdt.Load(model);

size_t feature_num = boost::lexical_cast<size_t>(argv[2]);

g_conf.number_of_feature = feature_num;

DataVector d;
LoadDataFromFile(argv[3], &d);

Loss loss_type = SQUARED_ERROR;
if (argc > 4 && std::strcmp(argv[4], "logit") == 0) {
loss_type = LOG_LIKELIHOOD;
}

g_conf.loss = loss_type;
bool absolute_g = false;
if (argc > 5 && std::strcmp(argv[5], "absolute") == 0) {
absolute_g = true;
}
g_conf.absolute_gain = absolute_g;

DataVector::iterator iter = d.begin();
PredictVector predict;
double *x = new double[feature_num];
for ( ; iter != d.end(); ++iter) {
ValueType p;
for (size_t i = 0; i < feature_num; ++i)
x[i] = 0.0;

if (loss_type == SQUARED_ERROR) {
if (g_conf.absolute_gain == false){
p = gbdt.Predict(**iter, x);
predict.push_back(p);
} else {
p = gbdt.Predict(**iter, x , g_conf.absolute_gain);
predict.push_back(p);
}
} else if (loss_type == LOG_LIKELIHOOD) {
if (g_conf.absolute_gain == false) {
p = gbdt.Predict(**iter, x);
p = Logit(p);
predict.push_back(p);
} else {
p = gbdt.Predict(**iter, x, g_conf.absolute_gain);
p = Logit(p);
predict.push_back(p);
}

}

std::cout << "tuple: " << (*iter)->ToString() << std::endl
<< "predict: " << p << std::endl;

std::cout << "x: ";
for (size_t i = 0; i < feature_num; ++i) {
std::cout << i << ":" << x[i] << " ";
}
std::cout << std::endl;

}
return 0;
}
41 changes: 41 additions & 0 deletions src/cpp/tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,44 @@ ValueType RegressionTree::Predict(const Node *root, const Tuple &t, double *p) {
}
}

ValueType RegressionTree::Predict(const Node *root, const Tuple &t, double *p, bool absolute_gain) {
std::stack<const Node *> pathstack;
double *x = new double [g_conf.number_of_feature];
for(size_t i = 0 ; i< g_conf.number_of_feature ; ++i) {
x[i] = 0.0;
}
while(!root->leaf){
if(t.feature[root->index] == kUnknownValue){
if(root->child[Node::UNKNOWN]){
pathstack.push(root);
root = root->child[Node::UNKNOWN];
}
else
return root->pred;
}
else if(t.feature[root->index] < root->value) {
pathstack.push(root);
root = root->child[Node::LT];

}
else {
pathstack.push(root);
root = root->child[Node::GE];
}
}
if (!pathstack.empty()) {
const Node * toppath = pathstack.top();
if(x[toppath->index] < fabs(toppath->pred - root->pred))
x[toppath->index] = fabs(toppath->pred - root->pred);
pathstack.pop();
}
for(size_t i = 0 ; i< g_conf.number_of_feature; ++i) {
p[i] += x[i];
}
delete [] x;
return root->pred;
}

void RegressionTree::Fit(DataVector *data, size_t len) {
assert(data->size() >= len);
delete root;
Expand All @@ -119,6 +157,9 @@ ValueType RegressionTree::Predict(const Tuple &t) const {
ValueType RegressionTree::Predict(const Tuple &t, double *p) const {
return Predict(root, t, p);
}
ValueType RegressionTree::Predict(const Tuple &t, double *p, bool absolute_gain) const {
return Predict(root, t, p, absolute_gain);
}

std::string RegressionTree::Save() const {
std::vector<const Node *> nodes;
Expand Down
4 changes: 4 additions & 0 deletions src/cpp/tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "data.hpp"
#include <map>
#include <vector>
#include <stack>

namespace gbdt {
class Node {
Expand Down Expand Up @@ -54,6 +55,8 @@ class RegressionTree {
ValueType Predict(const Tuple &t) const;

ValueType Predict(const Tuple &t, double *p) const;

ValueType Predict(const Tuple &t, double *p, bool absolute_gain) const;

std::string Save() const;
void Load(const std::string &s);
Expand All @@ -74,6 +77,7 @@ class RegressionTree {

static ValueType Predict(const Node *node, const Tuple &t);
static ValueType Predict(const Node *node, const Tuple &t, double *p);
static ValueType Predict(const Node *node, const Tuple &t, double *p, const bool absolute_gain);

static void SaveAux(const Node *node,
std::vector<const Node *> *nodes,
Expand Down