chengxr
/
QFD2


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
							#include "PCA.h"

#include <QDebug>
#include <iostream>

using namespace Eigen;

/**
    //    QVector<QVector<double>> pdata = {
    //        { 4.1, 0.04, 0.19, 0.01, 0.36, 0.05 }, { 3.9, 0.08, 0.38, 0.18, 0.19, 0.04 },
    //        { 5.9, 0.07, 0.23, 0.04, 0.36, 0.07 }, { 4.9, 0.23, 1.03, 0.3, 1.4, 0.09 },
    //        { 4, 0.14, 0.57, 0.11, 1.12, 0.05 },   { 6.1, 0.23, 1.19, 0.47, 0.93, 0.07 },
    //        { 8.2, 0.21, 0.88, 0.2, 0.97, 0.1 },   { 6.5, 0.13, 0.8, 0.26, 0.52, 0.09 },
    //        { 7.5, 0.16, 1.28, 0.88, 0.8, 0.12 },  { 6.8, 0.04, 0.17, 0.02, 0.24, 0.11 },
    //        { 7.4, 0.05, 0.33, 0.06, 0.39, 0.1 },  { 5.7, 0.08, 0.6, 0.47, 0.46, 0.08 },
    //        { 2.4, 0.14, 0.23, 0.04, 0.94, 0.02 }, { 2.2, 0.24, 0.35, 0.06, 1.78, 0.02 },
    //        { 1.9, 0.11, 0.3, 0.1, 0.5, 0.04 },    { 1.8, 0.06, 0.28, 0.1, 0.29, 0.03 },
    //        { 15.9, 0.2, 1.05, 0.21, 1.84, 0.12 }, { 18.9, 0.09, 0.82, 0.34, 0.58, 0.36 },
    //        { 18.8, 0.12, 0.8, 0.39, 0.76, 0.26 }, { 16.8, 0.27, 1.74, 1.51, 1.78, 0.32 },
    //    };

    QVector<QVector<double>> pdata = {
        { 66, 64, 65, 65, 65 }, { 65, 63, 63, 65, 64 }, { 57, 58, 63, 59, 66 }, { 67, 69, 65, 68, 64 },
        { 61, 61, 62, 62, 63 }, { 64, 65, 63, 63, 63 }, { 64, 63, 63, 63, 64 }, { 63, 63, 63, 63, 63 },
        { 65, 64, 65, 66, 64 }, { 67, 69, 69, 68, 67 }, { 62, 63, 65, 64, 64 }, { 68, 67, 65, 67, 65 },
        { 65, 65, 66, 65, 64 }, { 62, 63, 64, 62, 66 }, { 64, 66, 66, 65, 67 },
    };

    PCA pca(pdata, 0.0);
    pca.compute();
  */

// 把输入的数据矩阵中心化并返回
static void featurenormalize(MatrixXd &X)
{
    // 计算每一维度均值
    VectorXd meanval = X.colwise().mean();
    // 计算每一标准差
    VectorXd stdval = ((X.rowwise() - meanval.transpose()).array().square().colwise().sum() / (X.rows() - 1)).sqrt();

    // 数据进行标准化
    X = (X.rowwise() - meanval.transpose()).array().rowwise() / stdval.transpose().array();
}

// 计算数据矩阵X 的协方差矩阵C
static void computeCov(MatrixXd &X, MatrixXd &C)
{
    // 计算协方差矩阵C = X^T * X / (n-1);
    C = X.adjoint() * X;
    C = C.array() / (X.rows() - 1);
}

// 计算协方差矩阵C的特征值val和特征向量vec
static void computeEig(MatrixXd &C, MatrixXd &vec, MatrixXd &val)
{
    // 计算特征值和特征向量，使用selfadjont按照对阵矩阵的算法去计算，可以让产生的vec和val按照有序排列
    SelfAdjointEigenSolver<MatrixXd> eig(C);

    vec = eig.eigenvectors();
    val = eig.eigenvalues();
}

// 计算压缩成多少维数据损失小于thd
static int computeDim(MatrixXd &val, double thd)
{
    int dim;
    double sum = 0;
    for (int i = val.rows() - 1; i >= 0; --i) {
        sum += val(i, 0);
        dim = i;

        if (sum / val.sum() >= thd)
            break;
    }
    return val.rows() - dim;
}

PCA::PCA(const QVector<QVector<double>> &source, double thd) : variance_remain_(thd)
{
    rows_ = source.size();        // 行数
    cols_ = source.at(0).size();  // 列数
    X     = MatrixXd::Zero(rows_, cols_);
    C     = MatrixXd::Zero(cols_, cols_);

    for (int i = 0; i < rows_; ++i) {
        for (int j = 0; j < cols_; ++j) {
            X(i, j) = source[i][j];
        }
    }
}

void PCA::compute()
{
    // 定义特征向量矩阵vec，特征值val
    MatrixXd vec, val;

    // 计算每一维度均值
    MatrixXd meanval       = X.colwise().mean();
    RowVectorXd meanvecRow = meanval;

    // 零均值化
    featurenormalize(X);
    //    std::cout << X << std::endl;

    // 计算协方差
    computeCov(X, C);
    //    std::cout << C << std::endl;

    // 计算特征值和特征向量
    computeEig(C, vec, val);

    // 从小到大
    //    std::cout << vec << std::endl;
    //    std::cout << val << std::endl;

    // 计算损失率，确定降低维数（原本的n维向量即降低到dim维向量）
    int k = computeDim(val, variance_remain_);
    //    qDebug() << "计算出压缩后的维度为：" << k;

    contribution_rate_ = val / val.sum();

    // 定义投影矩阵V
    MatrixXd V(C.cols(), k);
    // 投影矩阵的值为特征向量矩阵的前k维
    V = vec.rightCols(k);
    std::cout << "qian k wei: " << V << std::endl;

    MatrixXd normalRate(k, contribution_rate_.cols());
    normalRate = contribution_rate_.bottomRows(k);
    // 归一化
    normalRate = normalRate / normalRate.sum();

    scores_  = MatrixXd::Zero(normalRate.rows(), 1);
    scores_  = V * normalRate;
    weights_ = MatrixXd::Zero(normalRate.rows(), 1);
    weights_ = scores_ / scores_.sum();
    //    std::cout << "scores_" << scores_ << std::endl;
    //    std::cout << "weights_" << weights_ << std::endl;

#if 0
    // 计算结果:把原始数据进行投影，每个数据点m维向量投影成k维向量
    MatrixXd res = X * V;

    // 还原的中心化矩阵Y
    MatrixXd Y(rows_, cols_);
    Y = res * V.transpose();

    // 样本每个维度都加上均值就是解压缩样本
    Y.rowwise() += meanvecRow;

#endif
}

QVector<double> PCA::weights() const
{
    QVector<double> res;
    for (int r = 0; r < weights_.rows(); ++r) {
        res << weights_(r, 0);
    }

    return res;
}

QVector<double> PCA::scores() const
{
    QVector<double> res;
    for (int r = 0; r < scores_.rows(); ++r) {
        res << scores_(r, 0);
    }

    return res;
}