#include "PCA.h" #include #include using namespace Eigen; /** // QVector> pdata = { // { 4.1, 0.04, 0.19, 0.01, 0.36, 0.05 }, { 3.9, 0.08, 0.38, 0.18, 0.19, 0.04 }, // { 5.9, 0.07, 0.23, 0.04, 0.36, 0.07 }, { 4.9, 0.23, 1.03, 0.3, 1.4, 0.09 }, // { 4, 0.14, 0.57, 0.11, 1.12, 0.05 }, { 6.1, 0.23, 1.19, 0.47, 0.93, 0.07 }, // { 8.2, 0.21, 0.88, 0.2, 0.97, 0.1 }, { 6.5, 0.13, 0.8, 0.26, 0.52, 0.09 }, // { 7.5, 0.16, 1.28, 0.88, 0.8, 0.12 }, { 6.8, 0.04, 0.17, 0.02, 0.24, 0.11 }, // { 7.4, 0.05, 0.33, 0.06, 0.39, 0.1 }, { 5.7, 0.08, 0.6, 0.47, 0.46, 0.08 }, // { 2.4, 0.14, 0.23, 0.04, 0.94, 0.02 }, { 2.2, 0.24, 0.35, 0.06, 1.78, 0.02 }, // { 1.9, 0.11, 0.3, 0.1, 0.5, 0.04 }, { 1.8, 0.06, 0.28, 0.1, 0.29, 0.03 }, // { 15.9, 0.2, 1.05, 0.21, 1.84, 0.12 }, { 18.9, 0.09, 0.82, 0.34, 0.58, 0.36 }, // { 18.8, 0.12, 0.8, 0.39, 0.76, 0.26 }, { 16.8, 0.27, 1.74, 1.51, 1.78, 0.32 }, // }; QVector> pdata = { { 66, 64, 65, 65, 65 }, { 65, 63, 63, 65, 64 }, { 57, 58, 63, 59, 66 }, { 67, 69, 65, 68, 64 }, { 61, 61, 62, 62, 63 }, { 64, 65, 63, 63, 63 }, { 64, 63, 63, 63, 64 }, { 63, 63, 63, 63, 63 }, { 65, 64, 65, 66, 64 }, { 67, 69, 69, 68, 67 }, { 62, 63, 65, 64, 64 }, { 68, 67, 65, 67, 65 }, { 65, 65, 66, 65, 64 }, { 62, 63, 64, 62, 66 }, { 64, 66, 66, 65, 67 }, }; PCA pca(pdata, 0.0); pca.compute(); */ // 把输入的数据矩阵中心化并返回 static void featurenormalize(MatrixXd &X) { // 计算每一维度均值 VectorXd meanval = X.colwise().mean(); // 计算每一标准差 VectorXd stdval = ((X.rowwise() - meanval.transpose()).array().square().colwise().sum() / (X.rows() - 1)).sqrt(); // 数据进行标准化 X = (X.rowwise() - meanval.transpose()).array().rowwise() / stdval.transpose().array(); } // 计算数据矩阵X 的协方差矩阵C static void computeCov(MatrixXd &X, MatrixXd &C) { // 计算协方差矩阵C = X^T * X / (n-1); C = X.adjoint() * X; C = C.array() / (X.rows() - 1); } // 计算协方差矩阵C的特征值val和特征向量vec static void computeEig(MatrixXd &C, MatrixXd &vec, MatrixXd &val) { // 计算特征值和特征向量，使用selfadjont按照对阵矩阵的算法去计算，可以让产生的vec和val按照有序排列 SelfAdjointEigenSolver eig(C); vec = eig.eigenvectors(); val = eig.eigenvalues(); } // 计算压缩成多少维数据损失小于thd static int computeDim(MatrixXd &val, double thd) { int dim; double sum = 0; for (int i = val.rows() - 1; i >= 0; --i) { sum += val(i, 0); dim = i; if (sum / val.sum() >= thd) break; } return val.rows() - dim; } PCA::PCA(const QVector> &source, double thd) : variance_remain_(thd) { rows_ = source.size(); // 行数 cols_ = source.at(0).size(); // 列数 X = MatrixXd::Zero(rows_, cols_); C = MatrixXd::Zero(cols_, cols_); for (int i = 0; i < rows_; ++i) { for (int j = 0; j < cols_; ++j) { X(i, j) = source[i][j]; } } } void PCA::compute() { // 定义特征向量矩阵vec，特征值val MatrixXd vec, val; // 计算每一维度均值 MatrixXd meanval = X.colwise().mean(); RowVectorXd meanvecRow = meanval; // 零均值化 featurenormalize(X); // std::cout << X << std::endl; // 计算协方差 computeCov(X, C); // std::cout << C << std::endl; // 计算特征值和特征向量 computeEig(C, vec, val); // 从小到大 // std::cout << vec << std::endl; // std::cout << val << std::endl; // 计算损失率，确定降低维数（原本的n维向量即降低到dim维向量） int k = computeDim(val, variance_remain_); // qDebug() << "计算出压缩后的维度为：" << k; contribution_rate_ = val / val.sum(); // 定义投影矩阵V MatrixXd V(C.cols(), k); // 投影矩阵的值为特征向量矩阵的前k维 V = vec.rightCols(k); std::cout << "qian k wei: " << V << std::endl; MatrixXd normalRate(k, contribution_rate_.cols()); normalRate = contribution_rate_.bottomRows(k); // 归一化 normalRate = normalRate / normalRate.sum(); scores_ = MatrixXd::Zero(normalRate.rows(), 1); scores_ = V * normalRate; weights_ = MatrixXd::Zero(normalRate.rows(), 1); weights_ = scores_ / scores_.sum(); // std::cout << "scores_" << scores_ << std::endl; // std::cout << "weights_" << weights_ << std::endl; #if 0 // 计算结果:把原始数据进行投影，每个数据点m维向量投影成k维向量 MatrixXd res = X * V; // 还原的中心化矩阵Y MatrixXd Y(rows_, cols_); Y = res * V.transpose(); // 样本每个维度都加上均值就是解压缩样本 Y.rowwise() += meanvecRow; #endif } QVector PCA::weights() const { QVector res; for (int r = 0; r < weights_.rows(); ++r) { res << weights_(r, 0); } return res; } QVector PCA::scores() const { QVector res; for (int r = 0; r < scores_.rows(); ++r) { res << scores_(r, 0); } return res; }