使用 householdler 演算, 可以將任意矩陣分解成 Q R 兩矩陣相乘. 原理是先從矩陣擷取出行向量, 算出單位向量 ûₖ, 再計算對應的屋頂保持矩陣(Householder matrix) Hₖ, 該矩陣的特性是: 1. 兩個矩陣相乘會變成單位矩陣 HₖHₖ = I, 也就是說原矩陣就是反矩陣 Hₖ⁻ = Hₖ, 就好像是一面鏡子可分拆兩邊的特性. 2. 當一個矩陣左邊乘上 H 後, 行向量所在矩陣, 正對角以下元素全部清為 0, 矩陣屋頂(正對角)上的元素則保持不受影響, 也就是說乘一次 H 就清一行 0, 要注意的是 Hₖ 會隨著矩陣的更動而變化, 根據上述兩特性, 任一矩陣 A 就能分解成 Q 及 R 矩陣(A = QR):
A = IA = I(HₖHₖ)A
= I(H₁H₁)A = (IH₁) * (H₁A)
= (IH₁)(H₂H₂)(H₁A) = (IH₁H₂) * (H₂H₁A)
= (IH₁H₂)(H₃H₃)(H₂H₁A = (IH₁H₂H₃) * (H₃H₂H₁A)
= ...
= (IH₁H₂H₃ ... Hₙ) * (Hₙ ... H₃H₂H₁A)
右式將矩陣 A 不斷的左乘 H, 隨著對角線以下, 一行一行清為零, 自然就變成上三角矩陣 R(又稱右矩陣) , 左式則將單位矩陣 I 不斷右乘 H 的結果, 最後演變成 Q 矩陣(它是正交矩陣, 因為 H 本身就是正交矩陣)
Hₖ = I - 2*ûₖ*ûₖᵗ , 其中 ûₖ 為 Householder 單位向量
R₁ = H₁A = (I - 2*û₁*û₁ᵗ)*A = A - 2*û₁*û₁ᵗ*A
Q₁ = IH₁ = I(I - 2*Q*û₁*û₁ᵗ) = I - 2*û₁*û₁ᵗ
Rₖ <= HₖRₖ₋₁ = (I - 2*ûₖ*ûₖᵗ)*Rₖ₋₁ = Rₖ₋₁ - 2*ûₖ*ûₖᵗ*Rₖ₋₁
Qₖ <= Qₖ₋₁Hₖ = Qₖ₋₁(I - 2*Q*ûₖ*ûₖᵗ) = Qₖ₋₁ - 2*Qₖ₋₁*ûₖ*ûₖᵗ
隨著 uₖ => Hₖ => Rₖ₊₁ => uₖ₊₁ => Hₖ₊₁ 不斷推陳出新, 利用疊代法就能算出最終的 Q, R:
R -= 2*ûₖ*ûₖᵗ*R
Q -= 2*Q*ûₖ*ûₖᵗ
#include <stdio.h>
#include <math.h>
#include <vector>
#define tol ((double)1e-12)
using namespace std;
using F_vector = vector<double>;
using F_matrix = vector<F_vector>;
void vectorShow(F_vector v) {
for (int i = 0; i < v.size(); i ++) { printf("%-10.4f \t", v[i]); }
printf("\n");
}
void matrixShow(F_matrix matrix) {
for (int j = 0; j < matrix.size(); j ++) {
printf("<%-2d:>\t", j);
vectorShow(matrix[j]);
}
printf("\n");
}
F_matrix transpose(F_matrix &matrix) { // 矩陣轉置
int row = matrix.size();
int col = matrix[0].size();
F_matrix temp(col, F_vector(row));
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j ++) {
temp[j][i] = matrix[i][j];
}
}
return temp;
}
F_matrix eye(int n) {// 單位矩陣
F_matrix temp(n, F_vector(n));
for (int i = 0; i < n; i ++) {
for (int j = 0; j < n; j++) { temp[i][j] = (i == j);}
}
return temp;
}
F_matrix product(F_matrix q, F_matrix r) { // 矩陣相乘
int dim = q[0].size();
if (dim != r.size()) return F_matrix(0);
int m = q.size();
int n = r[0].size();
F_matrix x(m, F_vector(n));
for (int i = 0; i < m; i ++) {
for (int j = 0; j < n; j ++) {
double sum = 0;
for (int k = 0; k < dim; k++) {
sum += q[i][k] * r[k][j];
}
x[i][j] = sum;
}
}
return x;
}
F_vector householdvector(F_matrix &R, int i) {
int m = R.size();
int mi = m - i;
F_vector ut(mi);
ut[0] = R[i][i];
double s = 0;
for (int jk = 1, j = i + 1; jk < mi ; jk ++) {
double r = R[j ++][i];
s += r * r;
ut[jk] = r;
}
ut[0] += sqrt(s + ut[0]*ut[0]);
s = sqrt(s + ut[0]*ut[0]);
if (s < tol) return F_vector(0);
for (int jk = 0; jk < mi ; jk ++) {
ut[jk] /= s;
}
return ut;
}
int householdQRD(F_matrix &xx, F_matrix &qq, F_matrix &rr) { // xx => QR
int rank = 0;
int m = xx.size();
int n = xx[0].size();
int maxSize = m;
if (maxSize < n) maxSize = n;
F_matrix Q = eye(m);
F_matrix R = xx;
F_matrix tempH(maxSize, F_vector(maxSize));
F_vector uu(maxSize);
for (int i = 0; i < n ; i ++) { // n columns
F_vector ut = householdvector(R, i);
int mi = ut.size();
if (mi == 0) break;
rank ++;
for (int k = i, ki = 0; k < n; k ++, ki ++) {
double sum = 0;
for (int kj = 0, j = i; kj < mi; kj ++, j++) {
sum += ut[kj] * R[j][k]; // uᵗ•Rj
}
uu[ki] = sum + sum; // w = 2uᵗ*R, which is n elements of row vector
}
for (int ki = 0; ki < n - i; ki ++) {
for (int kj = 0; kj < mi; kj ++) {
tempH[kj][ki] = ut[kj] * uu[ki];// tempH = u*w = 2u*(uᵗ*R), which is a miXni matrix
}
}
for (int k = i, ki = 0; k < n; k ++, ki ++) {
for(int j = i, kj = 0; j < m; j ++, kj ++) {
R[j][k] -= tempH[kj][ki]; // R -= tempH = 2u*(uᵗ*R)
}
}
for (int j = 0; j < m; j ++) {
double sum = 0;
for (int ki = 0, k = i; ki < mi; ki ++) {
sum += Q[j][k ++] * ut[ki]; // Qj•u
}
uu[j] = sum + sum; // v = 2Q*u; It is a column vector with m elements now.
}
for (int j = 0; j < m ; j ++) {
for (int ki = 0; ki < mi ; ki ++) {
tempH[j][ki] = uu[j] * ut[ki];// tempH = v*uᵗ = 2Q*u*uᵗ; which is a mXm matrix
}
}
for(int j = 0; j < m ; j++) {
for (int ki = 0, k = i; ki < mi; ki ++, k ++) {
Q[j][k] -= tempH[j][ki]; // Q -= tempH = (2Q*u)*uᵗ
}
}
}
swap(Q, qq);
swap(R, rr);
return rank;
}
F_matrix arrayclone(double *array, int w, int h) {
F_matrix temp(h, F_vector(w));
for (int i = 0; i < h; i++) {
for (int j = 0; j < w; j++) { temp[i][j]= *array ++; }
}
return temp;
}
int main(int argc, char **argv) {
double a[] = {
4, 2, 3, 4,
17, 8, 9, 13
};
F_matrix B = arrayclone(a, 4, 2);
F_matrix Q, R;
matrixShow(B);
householdQRD(B, Q, R);
matrixShow(Q);
matrixShow(R);
matrixShow(product(Q, R));
F_matrix C = transpose(B);
matrixShow(C);
householdQRD(C, Q, R);
matrixShow(Q);
matrixShow(R);
matrixShow(product(Q, R));
}
沒有留言:
張貼留言