0

mlpack compilation error related to LAPACK

mlpack compilation fails when LAPACK is not found by cmake. One solutions is to enable mlpack’s CMakeLists.txt to find LAPACK. Namely, the solution is to add FindLAPACK.cmake in the directory mlpack-1.0.9/CMake so that cmake can find LAPACK.

Advertisements
0

mlpack: K GMMs fitting by EM algorithm using K-1 GMMs fitting result

#include <time.h>

#include <mlpack/core.hpp>

#include <mlpack/methods/kmeans/kmeans.hpp>
#include <mlpack/methods/kmeans/allow_empty_clusters.hpp>
#include <mlpack/methods/kmeans/refined_start.hpp>
#include <mlpack/methods/kmeans/max_variance_new_cluster.hpp>

#include <mlpack/methods/gmm/gmm.hpp>
#include <mlpack/methods/gmm/phi.hpp>

#include <mlpack/methods/gmm/no_constraint.hpp>
#include <mlpack/methods/gmm/positive_definite_constraint.hpp>
#include <mlpack/methods/gmm/diagonal_constraint.hpp>
#include <mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp>

int main(int argc, char* argv[])
{
    srand(time(NULL));

    int numDim = 3;
    int numGaussian = 3;
    int numObservation = 1000;
    if(argc > 1)
    {
        str2scalar(argv[1], numObservation);
    }

////////////////////////////////////////////////////////////
// Synthesize ground truth data.
    mlpack::gmm::GMM<> gmm(numGaussian, numDim);
    for(int n = 0; n < numGaussian; ++n)
    {
        gmm.Means()[n] = 10.0*arma::randu<arma::vec>(numDim);
        gmm.Covariances()[n] = arma::eye<arma::mat>(numDim,numDim) + 0.1*arma::randu<arma::mat>(numDim,numDim);
        mlpack::gmm::PositiveDefiniteConstraint::ApplyConstraint(gmm.Covariances()[n]);
    }
    double minWeight = 0.0;
    while(minWeight < (1.0/(double)(numGaussian*5)) )
    {
        gmm.Weights() = arma::randu<arma::vec>(numDim);
        gmm.Weights() /= arma::accu(gmm.Weights());
        minWeight = gmm.Weights().min();
    }
    arma::mat data(numDim, numObservation);
    for(int n = 0; n < numObservation; ++n)
    {
        data.col(n) = gmm.Random();
    }

////////////////////////////////////////////////////////////
// EM algorithm for GMM fitting without initial guess.
    mlpack::gmm::GMM<> gmmCur(numGaussian, numDim);
    clock_t t = clock();
    gmmCur.Estimate(
        data,
        1
    );
    std::cout << "EM algorithm without initial guess: " << (double)(clock()-t)/CLOCKS_PER_SEC << std::endl;
    std::cout << "  EM fitting result without initial guess:" << std::endl;
    for(int n = 0; n < numGaussian; ++n)
    {
        std::cout << "mean[" << n << "] = " << gmmCur.Means()[n].t();
        std::cout << "cov.[" << n << "] = " << gmmCur.Covariances()[n].t();
        std::cout << "wei.[" << n << "] = " << gmmCur.Weights()[n] << std::endl;
    }

////////////////////////////////////////////////////////////
// EM algorithm for GMM fitting with initial guess.
    // step.0: compute K-1 GMM by EM algorithm
    mlpack::gmm::GMM<> gmmPrev(numGaussian-1, numDim);
    gmmPrev.Estimate(
        data,
        1
    );

    // step.1: compute assignment with known K-1 centroids
    t = clock();
    mlpack::kmeans::KMeans<> kmeans; // No overclustering.
    arma::uvec tmp;
    arma::mat centroidPrev = arma::zeros<arma::mat>(numDim,numGaussian-1);
    arma::Col<size_t> assignmentsPrev;
    arma::Col<size_t> clusterCountsPrev = arma::zeros< arma::Col<size_t> >(numGaussian-1);
    for(int n = 0; n < (numGaussian-1); ++n)
    {
        centroidPrev.col(n) = gmmPrev.Means()[n];
    }
    kmeans.Cluster(
        data,
        numGaussian-1,
        assignmentsPrev,
        centroidPrev,
        false,
        true
    );
    tmp = arma::hist(assignmentsPrev, arma::linspace< arma::Col<size_t> >(0,numGaussian-2,numGaussian-1) );
    for(int n = 0; n < (numGaussian-1); ++n)
    {
        clusterCountsPrev(n) = tmp(n);
    }

    // step.2: find K-th centroid
    arma::mat centroidCur = arma::zeros<arma::mat>(numDim,numGaussian);
    arma::Col<size_t> assignmentsCur;
    arma::Col<size_t> clusterCountsCur = arma::zeros< arma::Col<size_t> >(numGaussian);
    for(int n = 0; n < (numGaussian-1); ++n)
    {
        centroidCur.col(n) = centroidPrev.col(n);
        clusterCountsCur(n) = tmp(n);
    }
    size_t numOfChangedPoints = mlpack::kmeans::MaxVarianceNewCluster::EmptyCluster(
        data,
        numGaussian-1,
        centroidCur,
        clusterCountsCur,
        assignmentsPrev
    );
    for(int m = 0; m < numObservation; ++m)
    {
        if( assignmentsPrev(m) == (numGaussian-1) )
        {
            centroidCur.col(numGaussian-1) = data.col(m);
        }
    }

    // step.3: re-compute assignments with K centroids
    kmeans.Cluster(
        data,
        numGaussian,
        assignmentsCur,
        centroidCur,
        false,
        true
    );
    tmp = arma::hist(assignmentsCur, arma::linspace< arma::Col<size_t> >(0,numGaussian-1,numGaussian) );
    for(int n = 0; n < numGaussian; ++n)
    {
        clusterCountsCur(n) = tmp(n);
    }

    // step.4 computes covariance and weight
    mlpack::gmm::GMM<> gmmCur_(numGaussian, numDim); // 3 dimensions, 4 components.
    gmmCur_.Means() = std::vector<arma::vec>(numGaussian, arma::zeros<arma::vec>(numDim));
    gmmCur_.Covariances() = std::vector<arma::mat>(numGaussian, arma::zeros<arma::mat>(numDim,numDim));
    gmmCur_.Weights() = arma::zeros<arma::vec>(numGaussian);
    for(int n = 0; n < numObservation; ++n)
    {
        gmmCur_.Means()[ assignmentsCur[n] ] += data.col(n);
    }
    for(int n = 0; n < numGaussian; ++n)
    {
        gmmCur_.Means()[n] /= (clusterCountsCur[n] > 1) ? (double)clusterCountsCur[n] : 1.0;
        gmmCur_.Weights()(n) = (double)clusterCountsCur(n) / (double)numObservation;
    }
    size_t cluster;
    arma::vec vecDiff;
    for(int n = 0; n < numObservation; ++n)
    {
        cluster = assignmentsCur[n];
        vecDiff = data.col(n) - gmmCur_.Means()[cluster];
        gmmCur_.Covariances()[cluster] += vecDiff * vecDiff.t();
    }
    for(int n = 0; n < numGaussian; ++n)
    {
        gmmCur_.Covariances()[n] /= (clusterCountsCur[n] >1) ? (double)clusterCountsCur[n] : 1.0;
        mlpack::gmm::PositiveDefiniteConstraint::ApplyConstraint(gmmCur_.Covariances()[n]);
    }

    // step.5 EM algorithm with known means, covariances, and weights
    gmmCur_.Estimate(
        data,
        1,
        true
    );

    std::cout << "EM algorithm with initial guess: " << (double)(clock()-t)/CLOCKS_PER_SEC << std::endl;
    std::cout << "  EM fitting result with initial guess:" << std::endl;
    for(int n = 0; n < numGaussian; ++n)
    {
        std::cout << "mean[" << n << "] = " << gmmCur_.Means()[n].t();
        std::cout << "cov.[" << n << "] = " << gmmCur_.Covariances()[n].t();
        std::cout << "wei.[" << n << "] = " << gmmCur_.Weights()[n] << std::endl;
    }

    std::cout << "  Ground truth:" << std::endl;
    for(int n = 0; n < numGaussian; ++n)
    {
        std::cout << "mean[" << n << "] = " << gmm.Means()[n].t();
        std::cout << "cov.[" << n << "] = " << gmm.Covariances()[n].t();
        std::cout << "wei.[" << n << "] = " << gmm.Weights()[n] << std::endl;
    }

    return 0;
}
0

MLPack: generate random data from GMM

int numGauss = 3;
int numDim = 3;
int numObs = 1000;
mlpack::gmm::GMM<> gmm(numGauss,numDim);
arma::mat covar(numDim, numDim);

gmm.Weights() = GenerateWeight(numGauss, 1.0/((double)numGauss*2.0));
// set j-th gmm's mean and covariance
for(int k = 0; k < numGauss; ++k)
{
    gmm.Means()[k] = 255.0*arma::randu&amp;lt; arma::vec &amp;gt;(numDim);
    covar = arma::eye<arma::mat>(numDim, numDim);
    covar += 0.2*arma::randu<arma::mat>(numDim, numDim);
    gmm.Covariances()[k] = arma::symmatu(covar);
}
armaData = arma::zeros(numDim, numObs);
for(int i = 0; i < numObs; ++i)
{
    armaData.col(i) = gmm.Random();
}
0

mlpack: GMM fitting

mlpack::gmm::Estimate() estimates the probability distribution directly from the given observations, using the given algorithm in the FittingType class to fit the data. The fitting will be performed ‘trials’ times; from these trials, the model with the greatest log-likelihood will be selected. By default, only one trial is performed. The log-likelihood of the best fitting is returned.

template<typename FittingType = EMFit<>>
double mlpack::gmm::GMM< FittingType >::Estimate(
 	const arma::mat &  	observations,   // Observations of the model.
	const size_t trials = 1,            // Number of trials to perform; the model in these trials with the greatest log-likelihood will be selected.
	const bool useExistingModel = false	// If true, the existing model is used as an initial model for the estimation.  
)

taking into account the probability of each observation actually being from this distribution

template<typename FittingType = EMFit<>>
double mlpack::gmm::GMM< FittingType >::Estimate(
 	const arma::mat &  	observations,   // Observations of the model.
	const arma::vec &  	probabilities,  // Probability of each observation being from this distribution.
	const size_t trials = 1,            // Number of trials to perform; the model in these trials with the greatest log-likelihood will be selected.
	const bool useExistingModel = false // If true, the existing model is used as an initial model for the estimation.	 
) 	
0

mlpack: create Gaussian Mixture Model

mlpack::gmm::GMM creates a Gaussian Mixture Model with different constructors.

Create an empty Gaussian Mixture Model, with zero gaussians.

template<typename FittingType = EMFit<>>
mlpack::gmm::GMM< FittingType >::GMM()

Create a GMM with the given number of Gaussians, each of which have the specified dimensionality.

template<typename FittingType = EMFit<>>
mlpack::gmm::GMM< FittingType >::GMM(
const size_t      gaussians,     // Number of Gaussians in this GMM.
const size_t      dimensionality // Dimensionality of each Gaussian.
)
// Example usage
// create a GMM with 3 6D Gaussians
size_t gaussians = 3;
size_t dimensionality = 6;
mlpack::gmm::GMM<> gmm(gaussians,dimensionality);

Create a GMM with the given number of Gaussians, each of which have the specified dimensionality.
Also, pass in an initialized FittingType class; this is useful in cases where the FittingType class needs to store some state.

template<typename FittingType = EMFit<>>
mlpack::gmm::GMM< FittingType >::GMM(
const size_t      gaussians,      // Number of Gaussians in this GMM.
const size_t      dimensionality, // Dimensionality of each Gaussian.
FittingType &      fitter            // Initialized fitting mechanism.
)

Create a GMM with the given means, covariances, and weights.

template<typename FittingType = EMFit<>>
mlpack::gmm::GMM< FittingType >::GMM(
const std::vector< arma::vec > & means,       // Means of the model.
const std::vector< arma::mat > & covariances, // Covariances of the model.
const arma::vec &                   weights      // Weights of the model.
)

Create a GMM with the given means, covariances, and weights, and use the given initialized FittingType class.
This is useful in cases where the FittingType class needs to store some state.

template<typename FittingType = EMFit<>>
mlpack::gmm::GMM< FittingType >::GMM(
const std::vector< arma::vec > & means,       // Means of the model.
const std::vector< arma::mat > & covariances, // Covariances of the model.
const arma::vec &                   weights,     // Weights of the model.
FittingType &                       fitter
)
0

mlpack: how to compute Gaussian probability density function

mlpack::gmm::phi() computes univariate/multivariate Gaussian probability density functions.

probability of a univariate Gaussian.
double mlpack::gmm::phi(
    const double x,    // Observation.
    const double mean, // Mean of univariate Gaussian.
    const double var   // Variance of univariate Gaussian.
) // returns Probability of x being observed from the given univariate Gaussian.
// Example usage
    double x, mean, var;
    ....
    double f = phi(x, mean, var);

probability of a multivariate Gaussian.

double mlpack::gmm::phi(
     const arma::vec &      x,    // Observation.
    const arma::vec &      mean, // Mean of multivariate Gaussian.
    const arma::mat &      cov      // Covariance of multivariate Gaussian.
) // returns Probability of x being observed from the given multivariate Gaussian.
// Example usage
    arma::vec x, mean;
    arma::mat cov;
    ....
    double f = phi(x, mean, cov);

a set of probabilities of a multivariate Gaussian.

void mlpack::gmm::phi(
     const arma::mat &      x,            // List of observations.
    const arma::vec &      mean,         // Mean of multivariate Gaussian.
    const arma::mat &      cov,          // Covariance of multivariate Gaussian.
    arma::vec &      probabilities    // Output probabilities for each input observation.
) // Calculates the multivariate Gaussian probability density function for each data point (column) in the given matrix, with respect to the given mean and variance.

probability of a multivariate Gaussian and its gradients

double mlpack::gmm::phi(
     const arma::vec &      x,    // Observation.
    const arma::vec &      mean, // Mean of multivariate Gaussian.
    const arma::mat &      cov,  // Covariance of multivariate Gaussian.
    const std::vector< arma::mat > & d_cov,  //
    arma::vec &                         g_mean, // gradients w.r.t. the mean
    arma::vec &                         g_cov     // gradients w.r.t. the covariance
) // Calculates the multivariate Gaussian probability density function and also the gradients with respect to the mean and the variance.
// Example usage
    arma::vec x, mean, g_mean, g_cov;
    std::vector<arma::mat> d_cov; // the dSigma
    ....
    double f = phi(x, mean, cov, d_cov, &g_mean, &g_cov);
0

mlpack installation on Ubuntu 12.04

mlpack is a C++ machine learning library, which is developed by the fundamental algorithmic and statistical tools laboratory (FASTLab) at Georgia Tech. Machine learning methods implemented in mlpack are following:

mlpack requires

0. Prerequisities
0.1. BLAS

  1. sudo apt-get install libblas-dev

0.2. LAPACK
Armadillo requires LAPACK to be compiled with gfortran.
#Lapack with gfortran

  1. wget http://www.netlib.org/lapack/lapack-3.5.0.tgz
  2. tar xzf lapack-3.5.0.tgz
  3. cd lapack-3.5.0/
  4. cmake-gui .
  5. # set CMAKE_Fortran_COMPILER = /usr/bin/gfortran for Armadillo
  6. # set CMAKE_Fortran_FLAGS = -fPIC
  7. make
  8. sudo make install

0.3 Armadillo >= 3.6.0 (with LAPACK support)

  1. wget http://downloads.sourceforge.net/project/arma/armadillo-3.930.2.tar.gz
  2. tar xzf armadillo-3.930.2.tar.gz
  3. cd armadillo-3.930.2
  4. cmake-gui .
  5. make
  6. sudo make install

0.4 LibXML2 >= 2.6.0

  1. sudo apt-get -yV install libxml2-dev

1. MLpack

  1. sudo apt-get -yV install txt2man
  2. wget http://www.mlpack.org/files/mlpack-1.0.7.tar.gz
  3. tar xzf mlpack-1.0.7.tar.gz
  4. mkdir mlpack-1.0.7-build
  5. cd mlpack-1.0.7-build
  6. cmake-gui ../mlpack-1.0.7
  7. make
  8. sudo make install
  9. sudo ldconfig