From d1a5ae450f9ca78beaaebe2a8d4902a6bcb68515 Mon Sep 17 00:00:00 2001 From: schodet Date: Sun, 14 Nov 2004 19:16:18 +0000 Subject: Ajout de la division d'un f824 par un f824. --- n/asserv/src/dsp.c | 306 ++++++++++++++++++++++++++++++++-------------- n/asserv/src/dsp_check2.c | 70 +++++++++-- n/asserv/src/test_dsp.c | 35 ++++++ 3 files changed, 310 insertions(+), 101 deletions(-) (limited to 'n/asserv') diff --git a/n/asserv/src/dsp.c b/n/asserv/src/dsp.c index 576040b..4484b2f 100644 --- a/n/asserv/src/dsp.c +++ b/n/asserv/src/dsp.c @@ -43,19 +43,19 @@ extern inline int16_t dsp_add_sat_i16i16 (int16_t a, int16_t b) { - asm ("add %A0, %A1\n\t" - "adc %B0, %B1\n\t" + asm ("add %A0, %A1" "\n\t" + "adc %B0, %B1" "\n\t" /* Branch if not V. */ - "brvc 1f\n\t" + "brvc 1f" "\n\t" /* Load Max. */ - "ldi %A0, 0xff\n\t" - "ldi %B0, 0x7f\n\t" + "ldi %A0, 0xff" "\n\t" + "ldi %B0, 0x7f" "\n\t" /* Branch if not S. */ - "brlt 1f\n\t" + "brlt 1f" "\n\t" /* Load Min. */ - "ldi %A0, 0x00\n\t" - "ldi %B0, 0x80\n\t" - "1:" "\n\t" + "ldi %A0, 0x00" "\n\t" + "ldi %B0, 0x80" "\n\t" + "1:" "" "\n\t" : "=r" (a) : "0" (a), "r" (b)); return a; } @@ -65,17 +65,17 @@ extern inline int16_t dsp_mul_i16f88 (int16_t a, uint16_t b) { int16_t r; - asm ("mulsu %B1, %B2\n\t" - "mov %B0, r0\n\t" - "mul %A1, %A2\n\t" - "mov %A0, r1\n\t" - "mul %A1, %B2\n\t" - "add %A0, r0\n\t" - "adc %B0, r1\n\t" - "mulsu %B1, %A2\n\t" - "add %A0, r0\n\t" - "adc %B0, r1\n\t" - "clr r1\n\t" + asm ("mulsu %B1, %B2" "\n\t" + "mov %B0, r0" "\n\t" + "mul %A1, %A2" "\n\t" + "mov %A0, r1" "\n\t" + "mul %A1, %B2" "\n\t" + "add %A0, r0" "\n\t" + "adc %B0, r1" "\n\t" + "mulsu %B1, %A2" "\n\t" + "add %A0, r0" "\n\t" + "adc %B0, r1" "\n\t" + "clr r1" "\n\t" : "=&r" (r) : "a" (a), "a" (b) : "r0"); return r; } @@ -90,85 +90,207 @@ dsp_mul_f824 (int32_t a, int32_t b) int32_t r; int8_t z; asm ("" - "clr %1\n\t" + "clr %1" "\n\t" /* Low dword (>> 8, with 8 guards). */ - "mul %A2, %B3\n\t" - "movw %A0, r0\n\t" - "clr %C0\n\t" - "clr %D0\n\t" - "mul %A2, %A3\n\t" - "add %A0, r1\n\t" - "adc %B0, %1\n\t" - "adc %C0, %1\n\t" - "mul %B2, %A3\n\t" - "add %A0, r0\n\t" - "adc %B0, r1\n\t" - "adc %C0, %1\n\t" - "mul %A2, %C3\n\t" - "add %B0, r0\n\t" - "adc %C0, r1\n\t" - "adc %D0, %1\n\t" - "mul %B2, %B3\n\t" - "add %B0, r0\n\t" - "adc %C0, r1\n\t" - "adc %D0, %1\n\t" - "mul %C2, %A3\n\t" - "add %B0, r0\n\t" - "adc %C0, r1\n\t" - "adc %D0, %1\n\t" + "mul %A2, %B3" "\n\t" + "movw %A0, r0" "\n\t" + "clr %C0" "\n\t" + "clr %D0" "\n\t" + "mul %A2, %A3" "\n\t" + "add %A0, r1" "\n\t" + "adc %B0, %1" "\n\t" + "adc %C0, %1" "\n\t" + "mul %B2, %A3" "\n\t" + "add %A0, r0" "\n\t" + "adc %B0, r1" "\n\t" + "adc %C0, %1" "\n\t" + "mul %A2, %C3" "\n\t" + "add %B0, r0" "\n\t" + "adc %C0, r1" "\n\t" + "adc %D0, %1" "\n\t" + "mul %B2, %B3" "\n\t" + "add %B0, r0" "\n\t" + "adc %C0, r1" "\n\t" + "adc %D0, %1" "\n\t" + "mul %C2, %A3" "\n\t" + "add %B0, r0" "\n\t" + "adc %C0, r1" "\n\t" + "adc %D0, %1" "\n\t" /* Shift. */ - "movw %A0, %C0\n\t" + "movw %A0, %C0" "\n\t" /* Upper word. */ - "mulsu %D3, %C2\n\t" - "movw %C0, r0\n\t" - "mulsu %D3, %A2\n\t" - "sbc %C0, %1\n\t" - "sbc %D0, %1\n\t" - "add %A0, r0\n\t" - "adc %B0, r1\n\t" - "adc %C0, %1\n\t" - "adc %D0, %1\n\t" - "mul %B2, %C3\n\t" - "add %A0, r0\n\t" - "adc %B0, r1\n\t" - "adc %C0, %1\n\t" - "adc %D0, %1\n\t" - "mul %C2, %B3\n\t" - "add %A0, r0\n\t" - "adc %B0, r1\n\t" - "adc %C0, %1\n\t" - "adc %D0, %1\n\t" - "mulsu %D2, %A3\n\t" - "sbc %C0, %1\n\t" - "sbc %D0, %1\n\t" - "add %A0, r0\n\t" - "adc %B0, r1\n\t" - "adc %C0, %1\n\t" - "adc %D0, %1\n\t" - "mulsu %D3, %B2\n\t" - "sbc %D0, %1\n\t" - "add %B0, r0\n\t" - "adc %C0, r1\n\t" - "adc %D0, %1\n\t" - "mul %C2, %C3\n\t" - "add %B0, r0\n\t" - "adc %C0, r1\n\t" - "adc %D0, %1\n\t" - "mulsu %D2, %B3\n\t" - "sbc %D0, %1\n\t" - "add %B0, r0\n\t" - "adc %C0, r1\n\t" - "adc %D0, %1\n\t" - "mulsu %D2, %C3\n\t" - "add %C0, r0\n\t" - "adc %D0, r1\n\t" - "muls %D2, %D3\n\t" - "add %D0, r0\n\t" - "clr r1\n\t" + "mulsu %D3, %C2" "\n\t" + "movw %C0, r0" "\n\t" + "mulsu %D3, %A2" "\n\t" + "sbc %C0, %1" "\n\t" + "sbc %D0, %1" "\n\t" + "add %A0, r0" "\n\t" + "adc %B0, r1" "\n\t" + "adc %C0, %1" "\n\t" + "adc %D0, %1" "\n\t" + "mul %B2, %C3" "\n\t" + "add %A0, r0" "\n\t" + "adc %B0, r1" "\n\t" + "adc %C0, %1" "\n\t" + "adc %D0, %1" "\n\t" + "mul %C2, %B3" "\n\t" + "add %A0, r0" "\n\t" + "adc %B0, r1" "\n\t" + "adc %C0, %1" "\n\t" + "adc %D0, %1" "\n\t" + "mulsu %D2, %A3" "\n\t" + "sbc %C0, %1" "\n\t" + "sbc %D0, %1" "\n\t" + "add %A0, r0" "\n\t" + "adc %B0, r1" "\n\t" + "adc %C0, %1" "\n\t" + "adc %D0, %1" "\n\t" + "mulsu %D3, %B2" "\n\t" + "sbc %D0, %1" "\n\t" + "add %B0, r0" "\n\t" + "adc %C0, r1" "\n\t" + "adc %D0, %1" "\n\t" + "mul %C2, %C3" "\n\t" + "add %B0, r0" "\n\t" + "adc %C0, r1" "\n\t" + "adc %D0, %1" "\n\t" + "mulsu %D2, %B3" "\n\t" + "sbc %D0, %1" "\n\t" + "add %B0, r0" "\n\t" + "adc %C0, r1" "\n\t" + "adc %D0, %1" "\n\t" + "mulsu %D2, %C3" "\n\t" + "add %C0, r0" "\n\t" + "adc %D0, r1" "\n\t" + "muls %D2, %D3" "\n\t" + "add %D0, r0" "\n\t" + "clr r1" "\n\t" : "=&r" (r), "=&r" (z) : "a" (ar), "a" (br) : "r0"); return r; } +/** Divide f8.24 by f8.24, result f8.24. */ +int32_t +dsp_div_f824 (int32_t dd, int32_t dv) +{ + int32_t rem; + int8_t cnt; + asm ("" + /* Store sign. */ + "mov __tmp_reg__, %D1" "\n\t" + "eor __tmp_reg__, %D3" "\n\t" + /* Change sign. */ + "sbrs %D1, 7" "\n\t" + "rjmp 1f" "\n\t" + "com %A1" "\n\t" + "com %B1" "\n\t" + "com %C1" "\n\t" + "com %D1" "\n\t" + "subi %A1, 0xff" "\n\t" + "sbci %B1, 0xff" "\n\t" + "sbci %C1, 0xff" "\n\t" + "sbci %D1, 0xff" "\n\t" + "1:" "sbrs %D3, 7" "\n\t" + "rjmp 2f" "\n\t" + "com %A3" "\n\t" + "com %B3" "\n\t" + "com %C3" "\n\t" + "com %D3" "\n\t" + "subi %A3, 0xff" "\n\t" + "sbci %B3, 0xff" "\n\t" + "sbci %C3, 0xff" "\n\t" + "sbci %D3, 0xff" "\n\t" + /* Clear rem. */ + "2:" "clr %A0" "\n\t" + "clr %B0" "\n\t" + "movw %C0, %A0" "\n\t" + /* First loop, dropped bits. */ + "ldi %2, 24" "\n\t" + "1:" //"lsl %A1" "\n\t" // shift out dd + "lsl %B1" "\n\t" // do not touch A1 + "rol %C1" "\n\t" + "rol %D1" "\n\t" + "rol %A0" "\n\t" // shift in rem + "rol %B0" "\n\t" // 24 bits only + "rol %C0" "\n\t" + //"rol %D0" "\n\t" + "sub %A0, %A3" "\n\t" // rem -= dv + "sbc %B0, %B3" "\n\t" + "sbc %C0, %C3" "\n\t" + "sbc %D0, %D3" "\n\t" + "brcc 2f" "\n\t" // if negative, restore rem + "add %A0, %A3" "\n\t" + "adc %B0, %B3" "\n\t" + "adc %C0, %C3" "\n\t" + "adc %D0, %D3" "\n\t" + "2:" "dec %2" "\n\t" // test for loop + "brne 1b" "\n\t" + /* Second loop, stored bits. */ + "ldi %2, 8" "\n\t" + "3:" "rol %A1" "\n\t" // shift out dd, shift in result + "rol %A0" "\n\t" // shift in rem + "rol %B0" "\n\t" + "rol %C0" "\n\t" + "rol %D0" "\n\t" + "sub %A0, %A3" "\n\t" // rem -= dv + "sbc %B0, %B3" "\n\t" + "sbc %C0, %C3" "\n\t" + "sbc %D0, %D3" "\n\t" + "brcc 4f" "\n\t" // if negative, restore rem + "add %A0, %A3" "\n\t" + "adc %B0, %B3" "\n\t" + "adc %C0, %C3" "\n\t" + "adc %D0, %D3" "\n\t" + "clc" "\n\t" // result bit 0 + "sbrc __zero_reg__, 0" "\n\t" + "4:" "sec" "\n\t" // result bit 1 + "dec %2" "\n\t" // test for loop + "brne 3b" "\n\t" + /* Last loop, stored bits, dd padding bits. */ + "ldi %2, 24" "\n\t" + "5:" "rol %A1" "\n\t" // shift out dd, shift in result + "rol %B1" "\n\t" // 0s come from the first loop + "rol %C1" "\n\t" + "rol %D1" "\n\t" + "rol %A0" "\n\t" // shift in rem + "rol %B0" "\n\t" + "rol %C0" "\n\t" + "rol %D0" "\n\t" + "sub %A0, %A3" "\n\t" // rem -= dv + "sbc %B0, %B3" "\n\t" + "sbc %C0, %C3" "\n\t" + "sbc %D0, %D3" "\n\t" + "brcc 6f" "\n\t" // if negative, restore rem + "add %A0, %A3" "\n\t" + "adc %B0, %B3" "\n\t" + "adc %C0, %C3" "\n\t" + "adc %D0, %D3" "\n\t" + "clc" "\n\t" // result bit 0 + "sbrc __zero_reg__, 0" "\n\t" + "6:" "sec" "\n\t" // result bit 1 + "dec %2" "\n\t" // test for loop + "brne 5b" "\n\t" + /* Store last bit. */ + "rol %A1" "\n\t" // shift in result + "rol %B1" "\n\t" + "rol %C1" "\n\t" + "rol %D1" "\n\t" + /* Restore sign. */ + "sbrs __tmp_reg__, 7" "\n\t" + "rjmp 7f" "\n\t" + "com %A1" "\n\t" + "com %B1" "\n\t" + "com %C1" "\n\t" + "com %D1" "\n\t" + "subi %A1, 0xff" "\n\t" + "sbci %B1, 0xff" "\n\t" + "sbci %C1, 0xff" "\n\t" + "sbci %D1, 0xff" "\n\t" + "7:" "" "\n\t" + : "=&r" (rem), "=d" (dd), "=&d" (cnt) : "d" (dv), "1" (dd)); + return dd; +} + /** Compute cosinus for angles between [0,pi/2]. */ int32_t dsp_cos_dli (int32_t a) diff --git a/n/asserv/src/dsp_check2.c b/n/asserv/src/dsp_check2.c index 76d5cfe..92f2be0 100644 --- a/n/asserv/src/dsp_check2.c +++ b/n/asserv/src/dsp_check2.c @@ -45,6 +45,41 @@ check_mul_f824 (int a, int b, int r) printf (" | %08x %08x %08x\n", a, b, r); } +void +check_div_f824 (int d, int v, int r) +{ + long long m; + if (v == 0) + { + printf ("zero div %d / %d", d, v); + } + else + { + m = (((long long) d) << 24) / v; + if (m > (1LL << 31) - 1 || m < -(1LL << 31)) + printf ("overflow %d / %d = %d (%Ld)", d, v, r, m); + else if (m == r) + printf ("pass %d / %d = %d", d, v, r); + else + { + printf ("fail %d / %d = %d (%Ld)", d, v, r, m); + failled = 1; + } + } + printf (" | %08x %08x %08x\n", d, v, r); +} + +int +peek (void) +{ + int ret; + char cs; + ret = scanf (" %c", &cs); + if (ret != 1) return 0; + ungetc (cs, stdin); + return cs; +} + int get (int c, int *v) { @@ -61,19 +96,36 @@ int main (void) { int incomplete = 0; - int a, b, r1, r2, r3, r4; + int a, b, r1, r2, r3, r4, c; while (!feof (stdin)) { - if (get ('A', &a) && get ('B', &b) && get ('r', &r1) && get ('r', &r2) - && get ('r', &r3) && get ('r', &r4)) + c = peek (); + if (c == 'A') + { + if (get ('A', &a) && get ('B', &b) && get ('r', &r1) && get ('r', &r2) + && get ('r', &r3) && get ('r', &r4)) + { + check_mul_f824 (a, b, r1); + check_mul_f824 (-a, b, r2); + check_mul_f824 (a, -b, r3); + check_mul_f824 (-a, -b, r4); + } + else if (!feof (stdin)) + incomplete++; + } + else if (c == 'd') { - check_mul_f824 (a, b, r1); - check_mul_f824 (-a, b, r2); - check_mul_f824 (a, -b, r3); - check_mul_f824 (-a, -b, r4); + if (get ('d', &a) && get ('v', &b) && get ('r', &r1) && get ('r', &r2) + && get ('r', &r3) && get ('r', &r4)) + { + check_div_f824 (a, b, r1); + check_div_f824 (-a, b, r1); + check_div_f824 (a, -b, r1); + check_div_f824 (-a, -b, r1); + } + else if (!feof (stdin)) + incomplete++; } - else if (!feof (stdin)) - incomplete++; } if (incomplete) printf ("%d incomplete tests\n", incomplete); diff --git a/n/asserv/src/test_dsp.c b/n/asserv/src/test_dsp.c index d8781f2..27ddc9a 100644 --- a/n/asserv/src/test_dsp.c +++ b/n/asserv/src/test_dsp.c @@ -129,6 +129,41 @@ proto_callback (uint8_t c, uint8_t argc, proto_arg_t argv[]) } } break; + case 'd' | 0 << 8: + for (k = 0; k < 2; k++) + { + for (i = 32; i > 0; i--) + { + al = wl[k] >> i; + for (j = 32; j >= 0; j--) + { + bl = wl[k] >> j; + proto_send4 ('d', + (al >> 24) & 0xff, (al >> 16) & 0xff, + (al >> 8) & 0xff, al & 0xff); + proto_send4 ('v', + (bl >> 24) & 0xff, (bl >> 16) & 0xff, + (bl >> 8) & 0xff, bl & 0xff); + rl = dsp_div_f824 (al, bl); + proto_send4 ('r', + (rl >> 24) & 0xff, (rl >> 16) & 0xff, + (rl >> 8) & 0xff, rl & 0xff); + rl = dsp_div_f824 (-al, bl); + proto_send4 ('r', + (rl >> 24) & 0xff, (rl >> 16) & 0xff, + (rl >> 8) & 0xff, rl & 0xff); + rl = dsp_div_f824 (al, -bl); + proto_send4 ('r', + (rl >> 24) & 0xff, (rl >> 16) & 0xff, + (rl >> 8) & 0xff, rl & 0xff); + rl = dsp_div_f824 (-al, -bl); + proto_send4 ('r', + (rl >> 24) & 0xff, (rl >> 16) & 0xff, + (rl >> 8) & 0xff, rl & 0xff); + } + } + } + break; case 'c' | 0 << 8: for (al = 0; al < (1L << 24) + (1L << 21); al += 32 << 8) { -- cgit v1.2.3