8 #include <boost/algorithm/string.hpp>
22 int main(
int argc,
char **argv)
26 time_t starttime, endtime;
31 std::cout <<
"Usage: input n k d space output projections [seed]" << std::endl;
32 std::cout <<
" input = path to input file" << std::endl;
33 std::cout <<
" n = number of input points" << std::endl;
34 std::cout <<
" k = number of desired centers" << std::endl;
35 std::cout <<
" d = dimension of an input point" << std::endl;
36 std::cout <<
" space = coreset size" << std::endl;
37 std::cout <<
" projections = number of random projections used for nearest neighbour search" << std::endl;
38 std::cout <<
" in first level" << std::endl;
39 std::cout <<
" seed = random seed (optional)" << std::endl;
40 std::cout << std::endl;
41 std::cout <<
"7 arguments expected, got " << argc - 1 <<
":" << std::endl;
42 for (
int i = 1; i < argc; ++i)
43 std::cout << i <<
".: " << argv[i] << std::endl;
48 std::ifstream filestream(argv[1], std::ifstream::in);
49 int n = atoi(argv[2]);
50 int k = atoi(argv[3]);
51 int d = atoi(argv[4]);
52 int space = atoi(argv[5]);
53 std::ofstream outputstream(argv[6], std::ifstream::out);
54 int p = atoi(argv[7]);
64 while (filestream.good())
68 std::getline(filestream, line);
69 std::vector<std::string> stringcoords;
70 boost::split(stringcoords, line, boost::is_any_of(
" "));
72 std::vector<double> coords;
73 coords.reserve(stringcoords.size());
74 for (
size_t i = 0; i < stringcoords.size(); ++i)
75 coords.push_back(atof(stringcoords[i].c_str()));
80 std::clog <<
"Line skipped because line dimension is " << p.
dimension() <<
" instead of " << d << std::endl;
92 outputstream << sol->proxysets[0].size() <<
"\n";
95 for (
size_t i = 0; i < sol->proxysets[0].size(); ++i)
98 outputstream << sol->proxysets[0][i].getWeight() <<
" ";
100 for (
size_t j = 0; j < sol->proxysets[0][i].dimension(); ++j)
102 outputstream << sol->proxysets[0][i][j];
103 if (j < sol->proxysets[0][i].dimension() - 1)
106 outputstream <<
"\n";
108 outputstream.close();
static void initialize(uint_fast32_t seed)
Squared L2 metric for Point.
int main(int argc, char **argv)
Fast computation of k-means coresets in a data stream.
Modifies the weight of a Point.
Data structure for proxies.
Weighted point of arbitrary dimension.
virtual ProxySolution< T > * compute()
Returns a coreset of all point read so far.