From 806fe41ad5e9ad7b4582804c023ff538427cd42c Mon Sep 17 00:00:00 2001 From: Stefan Jahn Date: Sun, 24 Oct 2021 10:57:24 +0200 Subject: [PATCH 1/2] =?UTF-8?q?Geschwindigkeitsoptimierung=20f=C3=BCr=20di?= =?UTF-8?q?e=20Kreuzkorrelation,=20MMX=20ist=20doch=20nicht=20so=20vorteil?= =?UTF-8?q?haft...=20seltsam=3F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detect.cc | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/detect.cc b/detect.cc index 40984d4..8628718 100644 --- a/detect.cc +++ b/detect.cc @@ -1,5 +1,7 @@ +#include +#include #include #include #include "config.h" @@ -240,6 +242,14 @@ void Detect::InputDetect(int *posx, int *posy) { #endif } +inline float calc_vector(uint8_t a1, uint8_t a2, uint8_t b1, uint8_t b2, uint8_t c1, uint8_t c2, uint8_t d1, uint8_t d2) { + return (float) (a1 * a2 + b1 * b2 + c1 * c2 + d1 * d2); + // __m64 m1 = _mm_set_pi16 (a1, b1, c1, d1 ); + // __m64 m2 = _mm_set_pi16 (a2, b2, c2, d2 ); + // __m64 m3 = _mm_madd_pi16 (m1, m2); + // uint32_t *data = (uint32_t *) &m3; + // return (float) (data[0] + data[1]); +} #define OBJSIZE 50 #define MAXSHIFT 20 @@ -304,7 +314,12 @@ void Detect::InputDetectCrossC(int *posx, int *posy) { for (x = 0; x < OBJSIZE; x++, oldi += 3, ini += 3, oldx++, inx++) { if (oldx >= 0 && oldy >= 0 && oldx < oldFrame.w && oldy <= oldFrame.h && inx >= 0 && inx < inFrame.w && iny >= 0 && iny < inFrame.h) { - f += (float)(pxo[oldi+0])*(float)(pxi[ini+0]); + //f += (float)(pxo[oldi+0])*(float)(pxi[ini+0]); + f += calc_vector( pxo[oldi+0], pxi[ini+0], + pxo[oldi+3], pxi[ini+3], + pxo[oldi+6], pxi[ini+6], + pxo[oldi+9], pxi[ini+9]); + x+=3; oldi += 9; ini += 9; oldx+=3; inx+=3; } } } From 96b281e77ef102e032d60e5a71ed431fd1cff18d Mon Sep 17 00:00:00 2001 From: Stefan Jahn Date: Sun, 24 Oct 2021 11:32:39 +0200 Subject: [PATCH 2/2] MMX Code repariert, jetzt gibt es auch keine overflows mehr... --- detect.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/detect.cc b/detect.cc index 8628718..5b4d909 100644 --- a/detect.cc +++ b/detect.cc @@ -243,12 +243,13 @@ void Detect::InputDetect(int *posx, int *posy) { } inline float calc_vector(uint8_t a1, uint8_t a2, uint8_t b1, uint8_t b2, uint8_t c1, uint8_t c2, uint8_t d1, uint8_t d2) { - return (float) (a1 * a2 + b1 * b2 + c1 * c2 + d1 * d2); - // __m64 m1 = _mm_set_pi16 (a1, b1, c1, d1 ); - // __m64 m2 = _mm_set_pi16 (a2, b2, c2, d2 ); + return a1 * a2 + b1 * b2 + c1 * c2 + d1 * d2; + // __m64 m1 = _mm_set_pi16 (a1, b1, c1, d1); + // __m64 m2 = _mm_set_pi16 (a2, b2, c2, d2); // __m64 m3 = _mm_madd_pi16 (m1, m2); + // _mm_empty (); // uint32_t *data = (uint32_t *) &m3; - // return (float) (data[0] + data[1]); + // return data[0] + data[1]; } #define OBJSIZE 50