summaryrefslogtreecommitdiffhomepage
path: root/digital/avr/modules/math/fixed/fixed_mul_f824.avr.S
blob: 9da807d7db03c2d3720938150ae57d13ced7b552 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
; fixed_mul_f824.avr.S
; avr.math.fixed - Fixed point math module. {{{
;
; Copyright (C) 2005 Nicolas Schodet
;
; Robot APB Team/Efrei 2006.
;        Web: http://assos.efrei.fr/robot/
;      Email: robot AT efrei DOT fr
;
; This program is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
; 
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
;
; }}}

; a: A3.A2 A1 A0
; b: B3.B2 B1 B0
; m: M3.M2 M1 M0
; 
;      . => fractional dot
; A3xB3.        |
;   |A3xB2      |
;   |  .A3xB1   |
;   |  .   A3xB0|
;   |A2xB3      |
;   |  .A2xB2   |
;   |  .   A2xB1|
;   |  .      A2xB0
;   |  .A1xB3   |
;   |  .   A1xB2|
;   |  .      A1xB1
;   |  .        |A1xB0
;   |  .   A0xB3|
;   |  .      A0xB2
;   |  .        |A0xB1
;   |  .        |   A0xB0
;      .  [- step 1 --]
;   [- step 2 --]
;
; The operation is done in two steps. The three lsb multiplications can not be
; avoided.
; All multiplications results are added together. If the result is negative,
; do the sign extension (the sbc instructions).
; Result is rounded to the nearest value.
;
; Perfs:
;  96 cycles
;  10416 per second, per MHz

#define dummy1 r25
#define dummy0 r24

; mulsu are only allowed in r16-r23.
#define a3 r23
#define a2 r22
#define a1 r21
#define a0 r20

#define b3 r19
#define b2 r18
#define b1 r17
#define b0 r16

; r23 and r22 are used for a, registers will be moved before return.
#define m3 r25
#define m2 r24
#define m1 r27
#define m0 r26

#define m1r r23
#define m0r r22

#define z r30

	.text
	.global fixed_mul_f824_asm
	.func fixed_mul_f824_asm
fixed_mul_f824_asm:
	clr z
; Low dword (>> 8, with 8 guards).
	mul a0, b1
	 movw m0, r0
	 clr m2
	 clr m3
	mul a0, b0
	 add m0, r1
	 adc m1, z
	 adc m2, z
	mul a1, b0
	 add m0, r0
	 adc m1, r1
	 adc m2, z
	mul a0, b2
	 add m1, r0
	 adc m2, r1
	 adc m3, z
	mul a1, b1
	 add m1, r0
	 adc m2, r1
	 adc m3, z
	mul a2, b0
	 add m1, r0
	 adc m2, r1
	 adc m3, z
; Rounding, m0 is dropped.
	ldi m0, 0x80
	add m1, m0
	adc m2, z
	adc m3, z
; Shift.
	movw m0, m2
; Upper word.
	mulsu b3, a2
	 movw m2, r0
	mulsu b3, a0
	 sbc m2, z
	 sbc m3, z
	 add m0, r0
	 adc m1, r1
	 adc m2, z
	 adc m3, z
	mul a1, b2
	 add m0, r0
	 adc m1, r1
	 adc m2, z
	 adc m3, z
	mul a2, b1
	 add m0, r0
	 adc m1, r1
	 adc m2, z
	 adc m3, z
	mulsu a3, b0
	 sbc m2, z
	 sbc m3, z
	 add m0, r0
	 adc m1, r1
	 adc m2, z
	 adc m3, z
	mulsu b3, a1
	 sbc m3, z
	 add m1, r0
	 adc m2, r1
	 adc m3, z
	mul a2, b2
	 add m1, r0
	 adc m2, r1
	 adc m3, z
	mulsu a3, b1
	 sbc m3, z
	 add m1, r0
	 adc m2, r1
	 adc m3, z
	mulsu a3, b2
	 add m2, r0
	 adc m3, r1
	muls a3, b3
	 add m3, r0
; Restore r1 and return.
	clr r1
	movw m0r, m0
	ret
	.endfunc