BICO  1.1
 All Classes Namespaces Files Functions Variables Typedefs Pages
kmeansevaluator.h
Go to the documentation of this file.
1 #ifndef KMEANSEVALUATOR_H
2 #define KMEANSEVALUATOR_H
3 
4 #include "../base/measuresetter.h"
5 #include "../base/partitionprovider.h"
6 #include "../base/proxyevaluation.h"
7 #include "../base/combinedevaluation.h"
8 #include "../base/dissimilaritymeasure.h"
9 
10 namespace CluE
11 {
12 
13 //TODO check correct use of either "std::vector<T> proxies" or "std::vector<T*> proxies", use combinedcost in proxycost(?)
14 
21 template<typename T> class KMeansEvaluator : public ProxyEvaluation<T>, public CombinedEvaluation<T>, public MeasureSetter<T>
22 {
23 public:
24 
31 
34  virtual ~KMeansEvaluator();
35 
39  virtual void setMeasure(DissimilarityMeasure<T> const *measure);
40 
42 
43 //# ProxyEvaluation members
49  virtual double proxycost(std::vector<T*> const& points, std::vector<T> const& proxies) const;
50 
54  virtual double proxycost(std::vector<T*> const& points, std::vector<T*> const& proxies) const;
55 
61  virtual double proxycost(std::vector<T*> const& points, ProxyProvider<T> const &proxySource, unsigned int solutionIndex) const;
62 
66  virtual double proxycost(std::vector<T*> const& points, DiscreteProxyProvider<T> const &proxySource, unsigned int solutionIndex) const;
67 
68 
74  virtual double proxycost(std::vector<T*> const& points, std::vector<T> const& proxies, unsigned int index) const;
75 
79  virtual double proxycost(std::vector<T*> const& points, std::vector<T*> const& proxies, unsigned int index) const;
80 
86  virtual double proxycost(std::vector<T*> const& points, ProxyProvider<T> const &proxySource, unsigned int solutionIndex, unsigned int proxyIndex) const;
87 
91  virtual double proxycost(std::vector<T*> const& points, DiscreteProxyProvider<T> const &proxySource, unsigned int solutionIndex, unsigned int proxyIndex) const;
92 
93 //# CombinedEvaluation members
98  virtual double combinedcost(std::vector<std::vector<T*> > const& clusters, std::vector<T> const& proxies) const;
99 
103  virtual double combinedcost(std::vector<std::vector<T*> > const& clusters, std::vector<T*> const& proxies) const;
104 
105 
110  virtual double combinedcost(PartitionProvider<T> const &clusteringSource, ProxyProvider<T> const &proxySource, unsigned int solutionIndex) const;
111 
115  virtual double combinedcost(PartitionProvider<T> const &clusteringSource, DiscreteProxyProvider<T> const &proxySource, unsigned int solutionIndex) const;
116 
117 
122  virtual double combinedcost(std::vector<T*> const& cluster, T const& proxy) const;
123 
128  virtual double combinedcost(PartitionProvider<T> const &clusteringSource, ProxyProvider<T> const &proxySource, unsigned int solutionIndex, unsigned int proxyIndex) const;
129 
133  virtual double combinedcost(PartitionProvider<T> const &clusteringSource, DiscreteProxyProvider<T> const &proxySource, unsigned int solutionIndex, unsigned int proxyIndex) const;
134 
135 protected:
139  std::vector<double> proxycostGeneric(std::vector<T*> const& points, std::vector<T> const& proxies) const;
140 
144  std::vector<double> proxycostGeneric(std::vector<T*> const& points, std::vector<T*> const& proxies) const;
145 
146 private:
149 };
150 
151 template<typename T> KMeansEvaluator<T>::KMeansEvaluator(DissimilarityMeasure<T> const *measure) :
152  measure(measure==0 ? 0 : measure->clone()), weightModifier(0)
153 {
154  //empty
155 }
156 
157 template<typename T> KMeansEvaluator<T>::KMeansEvaluator(const KMeansEvaluator<T>& kme) :
158  measure(kme.measure == 0 ? 0 : kme.measure->clone()), weightModifier(0)
159 {
160 }
161 
163 {
164  if(measure != 0)
165  delete measure;
166  if(weightModifier != 0)
167  delete weightModifier;
168 
171 
172  measure = kme.measure == 0 ? 0 : kme.measure->clone();
173  weightModifier = kme.weightModifier == 0 ? 0 : kme.weightModifier->clone();
174 
175  return *this;
176 }
177 
179 {
180  if(measure != 0)
181  delete measure;
182  if(weightModifier != 0)
183  delete weightModifier;
184 }
185 
186 //### ProxyEvaluation members
187 
188 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, std::vector<T> const& proxies) const
189 {
190  std::vector<double> values = proxycostGeneric(points, proxies);
191  int numOfValues = values.size();
192 
193  double result = 0;
194  for(int i = 0; i < numOfValues; i++)
195  {
196  result += values[i];
197  }
198 
199  return result;
200 }
201 
202 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, std::vector<T*> const& proxies) const
203 {
204  std::vector<double> values = proxycostGeneric(points, proxies);
205  int numOfValues = values.size();
206 
207  double result = 0;
208  for(int i = 0; i < numOfValues; i++)
209  {
210  result += values[i];
211  }
212 
213  return result;
214 }
215 
216 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, ProxyProvider<T> const &provider, unsigned int solutionIndex) const
217 {
218  return proxycost(points, provider.proxies(solutionIndex));
219 }
220 
221 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, DiscreteProxyProvider<T> const &provider, unsigned int solutionIndex) const
222 {
223  return proxycost(points, provider.discrete_proxies(solutionIndex));
224 }
225 
226 
227 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, std::vector<T> const& proxies, unsigned int index) const
228 {
229  std::vector<double> values = proxycostGeneric(points, proxies);
230  return values.at(index);
231 }
232 
233 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, std::vector<T*> const& proxies, unsigned int index) const
234 {
235  std::vector<double> values = proxycostGeneric(points, proxies);
236  return values.at(index);
237 }
238 
239 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, ProxyProvider<T> const &provider, unsigned int solutionIndex, unsigned int proxyIndex) const
240 {
241  return proxycost(points, provider.proxies(solutionIndex), proxyIndex);
242 }
243 
244 template<typename T> double KMeansEvaluator<T>::proxycost(std::vector<T*> const& points, DiscreteProxyProvider<T> const &provider, unsigned int solutionIndex, unsigned int proxyIndex) const
245 {
246  return proxycost(points, provider.discrete_proxies(solutionIndex), proxyIndex);
247 }
248 
249 template<typename T> void KMeansEvaluator<T>::setMeasure(DissimilarityMeasure<T> const *measure)
250 {
251  if(measure)
252  this->measure = 0;
253  else
254  this->measure = measure->clone();
255 }
256 
257 
258 //TODO handle numOfProxies==0
259 template<typename T> std::vector<double> KMeansEvaluator<T>::proxycostGeneric(std::vector<T*> const& points, std::vector<T> const& proxies) const
260 {
261  int numOfPoints = points.size();
262  int numOfProxies = proxies.size();
263 
264  std::vector<double> result(numOfProxies, 0);
265 
266  for(int i = 0; i < numOfPoints; i++)
267  {
268  T* point = points[i];
269 
270  double min = this->measure->dissimilarity(*point, proxies[0]);
271  int assignedProxy = 0;
272  for(int j = 1; j < numOfProxies; j++)
273  {
274  T proxy = proxies[j];
275  double candidate = this->measure->dissimilarity(*point, proxy);
276  if(candidate < min)
277  {
278  min = candidate;
279  assignedProxy = j;
280  }
281  }
282  if(weightModifier != 0)
283  min *= weightModifier->getWeight(*point);
284  result[assignedProxy] += min;
285  }
286 
287  return result;
288 }
289 
290 //TODO handle numOfProxies==0
291 template<typename T> std::vector<double> KMeansEvaluator<T>::proxycostGeneric(std::vector<T*> const& points, std::vector<T*> const& proxies) const
292 {
293  int numOfPoints = points.size();
294  int numOfProxies = proxies.size();
295 
296  std::vector<double> result(numOfProxies, 0);
297 
298  for(int i = 0; i < numOfPoints; i++)
299  {
300  T* point = points[i];
301 
302  double min = this->measure->dissimilarity(*point, *proxies[0]);
303  int assignedProxy = 0;
304  for(int j = 1; j < numOfProxies; j++)
305  {
306  T* proxy = proxies[j];
307  double candidate = this->measure->dissimilarity(*point, *proxy);
308  if(candidate < min)
309  {
310  min = candidate;
311  assignedProxy = j;
312  }
313  }
314  if(weightModifier != 0)
315  min *= weightModifier->getWeight(*point);
316  result[assignedProxy] += min;
317  }
318 
319  return result;
320 }
321 
322 //### CombinedEvaluation members
323 
324 template<typename T> double KMeansEvaluator<T>::combinedcost(std::vector<std::vector<T*> > const& clusters, std::vector<T> const& proxies) const
325 {
326  double sum = 0;
327 
328  int numOfClusters = clusters.size();
329  int numOfProxies = proxies.size();
330  int minNumOfClustersProxies = numOfClusters < numOfProxies ? numOfClusters : numOfProxies;
331 
332  for(int i = 0; i < minNumOfClustersProxies; i++)
333  {
334  sum += combinedcost(clusters[i], proxies[i]);
335  }
336 
337  if(numOfClusters < numOfProxies)
338  {
339  std::clog << "CluE::KMeansEvaluator<T>::combinedcost(std::vector<std::vector<T*> >, std::vector<T>) - WARNING: More proxies than clusters: ignoring redundant proxies." << std::endl;
340  }
341  else if(numOfClusters > numOfProxies)
342  {
343  std::clog << "CluE::KMeansEvaluator<T>::combinedcost(std::vector<std::vector<T*> >, std::vector<T>) - WARNING: Less proxies than clusters: assigning remaining points to proxies." << std::endl;
344  for(int i = numOfProxies; i < numOfClusters; i++)
345  {
346  sum += proxycost(clusters[i], proxies);
347  }
348  }
349 
350  return sum;
351 }
352 
353 template<typename T> double KMeansEvaluator<T>::combinedcost(std::vector<std::vector<T*> > const& clusters, std::vector<T*> const& proxies) const
354 {
355  double sum = 0;
356 
357  int numOfClusters = clusters.size();
358  int numOfProxies = proxies.size();
359  int minNumOfClustersProxies = numOfClusters < numOfProxies ? numOfClusters : numOfProxies;
360 
361  for(int i = 0; i < minNumOfClustersProxies; i++)
362  {
363  sum += CombinedEvaluation<T>::combinedcost(clusters[i], proxies[i]);
364  }
365 
366  if(numOfClusters < numOfProxies)
367  {
368  std::clog << "CluE::KMeansEvaluator<T>::combinedcost(std::vector<std::vector<T*> >, std::vector<T*>) - WARNING: More proxies than clusters: ignoring redundant proxies." << std::endl;
369  }
370  else if(numOfClusters > numOfProxies)
371  {
372  std::clog << "CluE::KMeansEvaluator<T>::combinedcost(std::vector<std::vector<T*> >, std::vector<T*>) - WARNING: Less proxies than clusters: assigning remaining points to proxies." << std::endl;
373  for(int i = numOfProxies; i < numOfClusters; i++)
374  {
375  sum += proxycost(clusters[i], proxies);
376  }
377  }
378 
379  return sum;
380 }
381 
382 template<typename T> double KMeansEvaluator<T>::combinedcost(PartitionProvider<T> const &clusterProvider, ProxyProvider<T> const &proxyProvider, unsigned int solutionIndex) const
383 {
384  return combinedcost(clusterProvider.clustering(solutionIndex), proxyProvider.proxies(solutionIndex));
385 }
386 
387 template<typename T> double KMeansEvaluator<T>::combinedcost(PartitionProvider<T> const &clusterProvider, DiscreteProxyProvider<T> const &proxyProvider, unsigned int solutionIndex) const
388 {
389  return combinedcost(clusterProvider.clustering(solutionIndex), proxyProvider.discrete_proxies(solutionIndex));
390 }
391 
392 template<typename T> double KMeansEvaluator<T>::combinedcost(std::vector<T*> const& cluster, T const& proxy) const
393 {
394  double sum = 0;
395 
396  int numOfPoints = cluster.size();
397  for(int i = 0; i < numOfPoints; i++)
398  {
399  double dist = this->measure->dissimilarity(*cluster[i], proxy);
400  if(weightModifier != 0)
401  dist *= weightModifier->getWeight(*cluster[i]);
402  sum += dist;
403  }
404 
405  return sum;
406 }
407 
408 template<typename T> double KMeansEvaluator<T>::combinedcost(PartitionProvider<T> const &clusterProvider, ProxyProvider<T> const &proxyProvider, unsigned int solutionIndex, unsigned int proxyIndex) const
409 {
410  return combinedcost(clusterProvider.cluster(solutionIndex, proxyIndex), proxyProvider.proxy(solutionIndex, proxyIndex));
411 }
412 
413 template<typename T> double KMeansEvaluator<T>::combinedcost(PartitionProvider<T> const &clusterProvider, DiscreteProxyProvider<T> const &proxyProvider, unsigned int solutionIndex, unsigned int proxyIndex) const
414 {
415  return CombinedEvaluation<T>::combinedcost(clusterProvider.cluster(solutionIndex, proxyIndex), proxyProvider.discrete_proxy(solutionIndex, proxyIndex));
416 }
417 
419 {
420  if(wm != 0)
421  weightModifier = wm->clone();
422  else
423  wm = 0;
424 }
425 
426 }
427 
428 #endif
virtual T * discrete_proxy(unsigned int solutionIndex, unsigned int proxyIndex) const =0
Returns a pointer to the proxy for the specified clustering and cluster.
Abstract base class to access the results of proxy / center based clustering algorithms.
Abstract base class to access results of proxy / center based clustering algorithms.
Definition: proxyprovider.h:14
virtual WeightModifier< T > * clone() const =0
make an exact copy of this object The clone method creates a copy of this object and returns a pointe...
Calculates the k-means weight.
virtual double proxycost(std::vector< T * > const &points, std::vector< T > const &proxies) const
Assigns all points to a proxy and calculates the k-means weight of the resulting clustering.
void setWeightModifier(WeightModifier< T > *wm)
virtual std::vector< T * > cluster(unsigned int solutionIndex, unsigned int partitionIndex) const =0
Returns a vector of pointers to the elements of a particular cluster from the specified clustering...
Abstract class. Proxy based evaluation algorithms may be optimized by deriving from this class...
virtual double combinedcost(std::vector< std::vector< T * > > const &clusters, std::vector< T > const &proxies) const
Calculates the k-means weight of a given clustering.
Abstract base class to modify the weight of weighted objects.
virtual double combinedcost(std::vector< std::vector< T * > > const &partitioning, std::vector< T > const &proxies) const =0
Calculates the cost related to the proxies, based on the partitions.
std::vector< double > proxycostGeneric(std::vector< T * > const &points, std::vector< T > const &proxies) const
Provides a k-means weight result per cluster (may be added, chosen from, ...).
virtual T proxy(unsigned int solutionIndex, unsigned int proxyIndex) const =0
returns the proxy for the specified clustering and cluster
KMeansEvaluator(DissimilarityMeasure< T > const *measure=0)
Instantiates KMeansEvaluator, optionally with a DissimilarityMeasure to use when calculation the maxi...
virtual DissimilarityMeasure< T > * clone() const =0
virtual std::vector< std::vector< T * > > clustering(unsigned int solutionIndex) const =0
Returns the specified clustering as a vector of vector of pointers to the elements.
virtual std::vector< T * > discrete_proxies(unsigned int solutionIndex) const =0
Returns a vector of pointers to the proxies for the specified clustering.
WeightModifier< T > * weightModifier
KMeansEvaluator< T > & operator=(const KMeansEvaluator< T > &)
Interface to propagate the ability to set a DissimilarityMeasure.
Definition: measuresetter.h:13
virtual void setMeasure(DissimilarityMeasure< T > const *measure)
Sets the DissimilarityMeasure used when calculating the maximum diameter.
Abstract class for proxy-based evaluation algorithms.
Abstract base class to access results of partition based clustering algorithms.
virtual std::vector< T > proxies(unsigned int solutionIndex) const =0
returns the proxies for the specified clustering
Abstract base class for dissimilarity measurement.
DissimilarityMeasure< T > * measure