705 lines
16 KiB
C
705 lines
16 KiB
C
/*
|
|
* Copyright (C) 2016 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "common/math/mat.h"
|
|
|
|
#include <float.h>
|
|
|
|
#include "chre/util/nanoapp/assert.h"
|
|
|
|
#ifdef _OS_BUILD_
|
|
#include <nanohub_math.h>
|
|
#include <seos.h>
|
|
#else
|
|
#include <math.h>
|
|
#ifndef UNROLLED
|
|
#define UNROLLED
|
|
#endif
|
|
#endif // _OS_BUILD_
|
|
|
|
#include <stddef.h>
|
|
#include <string.h>
|
|
|
|
#define EPSILON 1E-5f
|
|
#define CHOLESKY_TOLERANCE 1E-6f
|
|
|
|
// Forward declarations.
|
|
static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
|
|
static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
|
|
static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
|
|
uint32_t l, uint32_t i, uint32_t j);
|
|
|
|
static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
|
|
|
|
void initZeroMatrix(struct Mat33 *A) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
memset(A->elem, 0.0f, sizeof(A->elem));
|
|
}
|
|
|
|
UNROLLED
|
|
void initDiagonalMatrix(struct Mat33 *A, float x) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
initZeroMatrix(A);
|
|
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
A->elem[i][i] = x;
|
|
}
|
|
}
|
|
|
|
void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
|
|
const struct Vec3 *v2, const struct Vec3 *v3) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(v1);
|
|
CHRE_ASSERT_NOT_NULL(v2);
|
|
CHRE_ASSERT_NOT_NULL(v3);
|
|
A->elem[0][0] = v1->x;
|
|
A->elem[0][1] = v2->x;
|
|
A->elem[0][2] = v3->x;
|
|
|
|
A->elem[1][0] = v1->y;
|
|
A->elem[1][1] = v2->y;
|
|
A->elem[1][2] = v3->y;
|
|
|
|
A->elem[2][0] = v1->z;
|
|
A->elem[2][1] = v2->z;
|
|
A->elem[2][2] = v3->z;
|
|
}
|
|
|
|
void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(v);
|
|
out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
|
|
out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
|
|
out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
|
|
const struct Mat33 *B) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(B);
|
|
CHRE_ASSERT(out != A);
|
|
CHRE_ASSERT(out != B);
|
|
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = 0; j < 3; ++j) {
|
|
uint32_t k;
|
|
float sum = 0.0f;
|
|
for (k = 0; k < 3; ++k) {
|
|
sum += A->elem[i][k] * B->elem[k][j];
|
|
}
|
|
|
|
out->elem[i][j] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33ScalarMul(struct Mat33 *A, float c) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = 0; j < 3; ++j) {
|
|
A->elem[i][j] *= c;
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = 0; j < 3; ++j) {
|
|
out->elem[i][j] += A->elem[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = 0; j < 3; ++j) {
|
|
out->elem[i][j] -= A->elem[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
if (A->elem[i][i] < 0.0f) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = i + 1; j < 3; ++j) {
|
|
if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
|
|
const struct Mat33 *B) {
|
|
CHRE_ASSERT(out != A);
|
|
CHRE_ASSERT(out != B);
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(B);
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = 0; j < 3; ++j) {
|
|
uint32_t k;
|
|
float sum = 0.0f;
|
|
for (k = 0; k < 3; ++k) {
|
|
sum += A->elem[k][i] * B->elem[k][j];
|
|
}
|
|
|
|
out->elem[i][j] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
|
|
const struct Mat33 *B) {
|
|
CHRE_ASSERT(out != A);
|
|
CHRE_ASSERT(out != B);
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(B);
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = 0; j < 3; ++j) {
|
|
uint32_t k;
|
|
float sum = 0.0f;
|
|
for (k = 0; k < 3; ++k) {
|
|
sum += A->elem[i][k] * B->elem[j][k];
|
|
}
|
|
|
|
out->elem[i][j] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
float t;
|
|
initDiagonalMatrix(out, 1.0f);
|
|
|
|
struct Mat33 tmp = *A;
|
|
|
|
uint32_t i, k;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t swap = i;
|
|
uint32_t j;
|
|
for (j = i + 1; j < 3; ++j) {
|
|
if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
|
|
swap = j;
|
|
}
|
|
}
|
|
|
|
if (swap != i) {
|
|
for (k = 0; k < 3; ++k) {
|
|
t = tmp.elem[i][k];
|
|
tmp.elem[i][k] = tmp.elem[swap][k];
|
|
tmp.elem[swap][k] = t;
|
|
|
|
t = out->elem[i][k];
|
|
out->elem[i][k] = out->elem[swap][k];
|
|
out->elem[swap][k] = t;
|
|
}
|
|
}
|
|
// divide by zero guard.
|
|
CHRE_ASSERT(fabsf(tmp.elem[i][i]) > 0);
|
|
if(!(fabsf(tmp.elem[i][i]) > 0)) {
|
|
return;
|
|
}
|
|
t = 1.0f / tmp.elem[i][i];
|
|
for (k = 0; k < 3; ++k) {
|
|
tmp.elem[i][k] *= t;
|
|
out->elem[i][k] *= t;
|
|
}
|
|
|
|
for (j = 0; j < 3; ++j) {
|
|
if (j != i) {
|
|
t = tmp.elem[j][i];
|
|
for (k = 0; k < 3; ++k) {
|
|
tmp.elem[j][k] -= tmp.elem[i][k] * t;
|
|
out->elem[j][k] -= out->elem[i][k] * t;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
uint32_t i;
|
|
for (i = 0; i < 3; ++i) {
|
|
uint32_t j;
|
|
for (j = 0; j < 3; ++j) {
|
|
out->elem[i][j] = A->elem[j][i];
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
const uint32_t N = 3;
|
|
uint32_t k;
|
|
|
|
if (i == j) {
|
|
return;
|
|
}
|
|
|
|
for (k = 0; k < N; ++k) {
|
|
float tmp = A->elem[i][k];
|
|
A->elem[i][k] = A->elem[j][k];
|
|
A->elem[j][k] = tmp;
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
|
|
struct Mat33 *eigenvecs) {
|
|
CHRE_ASSERT_NOT_NULL(S);
|
|
CHRE_ASSERT_NOT_NULL(eigenvals);
|
|
CHRE_ASSERT_NOT_NULL(eigenvecs);
|
|
const uint32_t N = 3;
|
|
uint32_t i, j, k, l, m;
|
|
|
|
float _eigenvals[N];
|
|
|
|
uint32_t ind[N];
|
|
for (k = 0; k < N; ++k) {
|
|
ind[k] = mat33Maxind(S, k);
|
|
_eigenvals[k] = S->elem[k][k];
|
|
}
|
|
|
|
initDiagonalMatrix(eigenvecs, 1.0f);
|
|
|
|
for (;;) {
|
|
m = 0;
|
|
for (k = 1; k + 1 < N; ++k) {
|
|
if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
|
|
m = k;
|
|
}
|
|
}
|
|
|
|
k = m;
|
|
l = ind[m];
|
|
float p = S->elem[k][l];
|
|
|
|
if (fabsf(p) < EPSILON) {
|
|
break;
|
|
}
|
|
|
|
float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
|
|
|
|
float t = fabsf(y) + sqrtf(p * p + y * y);
|
|
float s = sqrtf(p * p + t * t);
|
|
float c = t / s;
|
|
s = p / s;
|
|
t = p * p / t;
|
|
|
|
if (y < 0.0f) {
|
|
s = -s;
|
|
t = -t;
|
|
}
|
|
|
|
S->elem[k][l] = 0.0f;
|
|
|
|
_eigenvals[k] -= t;
|
|
_eigenvals[l] += t;
|
|
|
|
for (i = 0; i < k; ++i) {
|
|
mat33Rotate(S, c, s, i, k, i, l);
|
|
}
|
|
|
|
for (i = k + 1; i < l; ++i) {
|
|
mat33Rotate(S, c, s, k, i, i, l);
|
|
}
|
|
|
|
for (i = l + 1; i < N; ++i) {
|
|
mat33Rotate(S, c, s, k, i, l, i);
|
|
}
|
|
|
|
for (i = 0; i < N; ++i) {
|
|
float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
|
|
eigenvecs->elem[l][i] =
|
|
s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
|
|
eigenvecs->elem[k][i] = tmp;
|
|
}
|
|
|
|
ind[k] = mat33Maxind(S, k);
|
|
ind[l] = mat33Maxind(S, l);
|
|
|
|
float sum = 0.0f;
|
|
for (i = 0; i < N; ++i) {
|
|
for (j = i + 1; j < N; ++j) {
|
|
sum += fabsf(S->elem[i][j]);
|
|
}
|
|
}
|
|
|
|
if (sum < EPSILON) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (k = 0; k < N; ++k) {
|
|
m = k;
|
|
for (l = k + 1; l < N; ++l) {
|
|
if (_eigenvals[l] > _eigenvals[m]) {
|
|
m = l;
|
|
}
|
|
}
|
|
|
|
if (k != m) {
|
|
float tmp = _eigenvals[k];
|
|
_eigenvals[k] = _eigenvals[m];
|
|
_eigenvals[m] = tmp;
|
|
|
|
mat33SwapRows(eigenvecs, k, m);
|
|
}
|
|
}
|
|
|
|
initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
|
|
}
|
|
|
|
float mat33Determinant(const struct Mat33 *A) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
return A->elem[0][0] *
|
|
(A->elem[1][1] * A->elem[2][2] - A->elem[1][2] * A->elem[2][1])
|
|
- A->elem[0][1] *
|
|
(A->elem[1][0] * A->elem[2][2] - A->elem[1][2] * A->elem[2][0])
|
|
+ A->elem[0][2] *
|
|
(A->elem[1][0] * A->elem[2][1] - A->elem[1][1] * A->elem[2][0]);
|
|
}
|
|
|
|
// index of largest off-diagonal element in row k
|
|
UNROLLED
|
|
uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
const uint32_t N = 3;
|
|
|
|
uint32_t m = k + 1;
|
|
uint32_t i;
|
|
|
|
for (i = k + 2; i < N; ++i) {
|
|
if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
|
|
m = i;
|
|
}
|
|
}
|
|
|
|
return m;
|
|
}
|
|
|
|
void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
|
|
uint32_t i, uint32_t j) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
float tmp = c * A->elem[k][l] - s * A->elem[i][j];
|
|
A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
|
|
A->elem[k][l] = tmp;
|
|
}
|
|
|
|
void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(v);
|
|
|
|
out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
|
|
A->elem[0][3] * v->w;
|
|
|
|
out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
|
|
A->elem[1][3] * v->w;
|
|
|
|
out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
|
|
A->elem[2][3] * v->w;
|
|
|
|
out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
|
|
A->elem[3][3] * v->w;
|
|
}
|
|
|
|
UNROLLED
|
|
void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
|
|
CHRE_ASSERT_NOT_NULL(LU);
|
|
CHRE_ASSERT_NOT_NULL(pivot);
|
|
const uint32_t N = 4;
|
|
uint32_t i, j, k;
|
|
|
|
for (k = 0; k < N; ++k) {
|
|
pivot->elem[k] = k;
|
|
float max = fabsf(LU->elem[k][k]);
|
|
for (j = k + 1; j < N; ++j) {
|
|
if (max < fabsf(LU->elem[j][k])) {
|
|
max = fabsf(LU->elem[j][k]);
|
|
pivot->elem[k] = j;
|
|
}
|
|
}
|
|
|
|
if (pivot->elem[k] != k) {
|
|
mat44SwapRows(LU, k, pivot->elem[k]);
|
|
}
|
|
|
|
if (fabsf(LU->elem[k][k]) < EPSILON) {
|
|
continue;
|
|
}
|
|
|
|
for (j = k + 1; j < N; ++j) {
|
|
LU->elem[k][j] /= LU->elem[k][k];
|
|
}
|
|
|
|
for (i = k + 1; i < N; ++i) {
|
|
for (j = k + 1; j < N; ++j) {
|
|
LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
const uint32_t N = 4;
|
|
uint32_t k;
|
|
|
|
if (i == j) {
|
|
return;
|
|
}
|
|
|
|
for (k = 0; k < N; ++k) {
|
|
float tmp = A->elem[i][k];
|
|
A->elem[i][k] = A->elem[j][k];
|
|
A->elem[j][k] = tmp;
|
|
}
|
|
}
|
|
|
|
UNROLLED
|
|
void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
|
|
const struct Size4 *pivot) {
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(x);
|
|
CHRE_ASSERT_NOT_NULL(b);
|
|
CHRE_ASSERT_NOT_NULL(pivot);
|
|
const uint32_t N = 4;
|
|
uint32_t i, k;
|
|
|
|
float bCopy[N];
|
|
bCopy[0] = b->x;
|
|
bCopy[1] = b->y;
|
|
bCopy[2] = b->z;
|
|
bCopy[3] = b->w;
|
|
|
|
float _x[N];
|
|
for (k = 0; k < N; ++k) {
|
|
if (pivot->elem[k] != k) {
|
|
float tmp = bCopy[k];
|
|
bCopy[k] = bCopy[pivot->elem[k]];
|
|
bCopy[pivot->elem[k]] = tmp;
|
|
}
|
|
|
|
_x[k] = bCopy[k];
|
|
for (i = 0; i < k; ++i) {
|
|
_x[k] -= _x[i] * A->elem[k][i];
|
|
}
|
|
_x[k] /= A->elem[k][k];
|
|
}
|
|
|
|
for (k = N; k-- > 0;) {
|
|
for (i = k + 1; i < N; ++i) {
|
|
_x[k] -= _x[i] * A->elem[k][i];
|
|
}
|
|
}
|
|
|
|
initVec4(x, _x[0], _x[1], _x[2], _x[3]);
|
|
}
|
|
|
|
float matMaxDiagonalElement(const float *square_mat, size_t n) {
|
|
CHRE_ASSERT_NOT_NULL(square_mat);
|
|
CHRE_ASSERT(n > 0);
|
|
size_t i;
|
|
float max = square_mat[0];
|
|
const size_t n_square = n * n;
|
|
const size_t offset = n + 1;
|
|
for (i = offset; i < n_square; i += offset) {
|
|
if (square_mat[i] > max) {
|
|
max = square_mat[i];
|
|
}
|
|
}
|
|
return max;
|
|
}
|
|
|
|
void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
|
|
CHRE_ASSERT_NOT_NULL(square_mat);
|
|
size_t i;
|
|
const size_t n_square = n * n;
|
|
const size_t offset = n + 1;
|
|
for (i = 0; i < n_square; i += offset) {
|
|
square_mat[i] += u;
|
|
}
|
|
}
|
|
|
|
void matTransposeMultiplyMat(float *out, const float *A,
|
|
size_t nrows, size_t ncols) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
size_t i;
|
|
size_t j;
|
|
size_t k;
|
|
memset(out, 0, sizeof(float) * ncols * ncols);
|
|
for (i = 0; i < ncols; ++i) {
|
|
for (j = 0; j < ncols; ++j) {
|
|
// Since A' * A is symmetric, can use upper diagonal elements
|
|
// to fill in the lower diagonal without recomputing.
|
|
if (j < i) {
|
|
out[i * ncols + j] = out[j * ncols + i];
|
|
} else {
|
|
// mat_out[i, j] = ai ' * aj
|
|
out[i * ncols + j] = 0;
|
|
for (k = 0; k < nrows; ++k) {
|
|
out[i * ncols + j] += A[k * ncols + i] *
|
|
A[k * ncols + j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void matMultiplyVec(float *out, const float *A, const float *v,
|
|
size_t nrows, size_t ncols) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(v);
|
|
size_t i;
|
|
for (i = 0; i < nrows; ++i) {
|
|
const float *row = &A[i * ncols];
|
|
out[i] = vecDot(row, v, ncols);
|
|
}
|
|
}
|
|
|
|
void matTransposeMultiplyVec(float *out, const float *A, const float *v,
|
|
size_t nrows, size_t ncols) {
|
|
CHRE_ASSERT_NOT_NULL(out);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
CHRE_ASSERT_NOT_NULL(v);
|
|
size_t i, j;
|
|
for (i = 0; i < ncols; ++i) {
|
|
out[i] = 0;
|
|
for (j = 0; j < nrows; ++j) {
|
|
out[i] += A[j * ncols + i] * v[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
|
|
CHRE_ASSERT_NOT_NULL(x);
|
|
CHRE_ASSERT_NOT_NULL(L);
|
|
CHRE_ASSERT_NOT_NULL(b);
|
|
CHRE_ASSERT(n <= INT32_MAX);
|
|
int32_t i, j; // Loops below require signed integers.
|
|
int32_t s_n = (int32_t)n; // Signed n.
|
|
float sum = 0.0f;
|
|
// 1. Solve Ly = b through forward substitution. Use x[] to store y.
|
|
for (i = 0; i < s_n; ++i) {
|
|
sum = 0.0f;
|
|
for (j = 0; j < i; ++j) {
|
|
sum += L[i * s_n + j] * x[j];
|
|
}
|
|
// Check for non-zero diagonals (don't divide by zero).
|
|
if (L[i * s_n + i] < EPSILON) {
|
|
return false;
|
|
}
|
|
x[i] = (b[i] - sum) / L[i * s_n + i];
|
|
}
|
|
|
|
// 2. Solve L'x = y through backwards substitution. Use x[] to store both
|
|
// y and x.
|
|
for (i = s_n - 1; i >= 0; --i) {
|
|
sum = 0.0f;
|
|
for (j = i + 1; j < s_n; ++j) {
|
|
sum += L[j * s_n + i] * x[j];
|
|
}
|
|
x[i] = (x[i] - sum) / L[i * s_n + i];
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
|
|
CHRE_ASSERT_NOT_NULL(L);
|
|
CHRE_ASSERT_NOT_NULL(A);
|
|
size_t i, j, k;
|
|
float sum = 0.0f;
|
|
// initialize L to zero.
|
|
memset(L, 0, sizeof(float) * n * n);
|
|
|
|
for (i = 0; i < n; ++i) {
|
|
// compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
|
|
sum = 0.0f;
|
|
for (k = 0; k < i; ++k) {
|
|
sum += L[i * n + k] * L[i * n + k];
|
|
}
|
|
sum = A[i * n + i] - sum;
|
|
// If diagonal element of L is too small, cholesky fails.
|
|
if (sum < CHOLESKY_TOLERANCE) {
|
|
return false;
|
|
}
|
|
L[i * n + i] = sqrtf(sum);
|
|
|
|
// for j = i+1:N, compute L[j][i] =
|
|
// (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
|
|
for (j = i + 1; j < n; ++j) {
|
|
sum = 0.0f;
|
|
for (k = 0; k < i; ++k) {
|
|
sum += L[i * n + k] * L[j * n + k];
|
|
}
|
|
// division okay because magnitude of L[i][i] already checked above.
|
|
L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|