forked from ZipCPU/zipcpu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiv.v
354 lines (336 loc) · 12.3 KB
/
div.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
////////////////////////////////////////////////////////////////////////////////
//
// Filename: div.v
//
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
//
// Purpose: Provide an Integer divide capability to the Zip CPU. Provides
// for both signed and unsigned divide.
//
// Steps:
// i_rst The DIVide unit starts in idle. It can also be placed into an
// idle by asserting the reset input.
//
// i_wr When i_rst is asserted, a divide begins. On the next clock:
//
// o_busy is set high so everyone else knows we are at work and they can
// wait for us to complete.
//
// pre_sign is set to true if we need to do a signed divide. In this
// case, we take a clock cycle to turn the divide into an unsigned
// divide.
//
// o_quotient, a place to store our result, is initialized to all zeros.
//
// r_dividend is set to the numerator
//
// r_divisor is set to 2^31 * the denominator (shift left by 31, or add
// 31 zeros to the right of the number.
//
// pre_sign When true (clock cycle after i_wr), a clock cycle is used
// to take the absolute value of the various arguments (r_dividend
// and r_divisor), and to calculate what sign the output result
// should be.
//
//
// At this point, the divide is has started. The divide works by walking
// through every shift of the
//
// DIVIDEND over the
// DIVISOR
//
// If the DIVISOR is bigger than the dividend, the divisor is shifted
// right, and nothing is done to the output quotient.
//
// DIVIDEND
// DIVISOR
//
// This repeats, until DIVISOR is less than or equal to the divident, as in
//
// DIVIDEND
// DIVISOR
//
// At this point, if the DIVISOR is less than the dividend, the
// divisor is subtracted from the dividend, and the DIVISOR is again
// shifted to the right. Further, a '1' bit gets set in the output
// quotient.
//
// Once we've done this for 32 clocks, we've accumulated our answer into
// the output quotient, and we can proceed to the next step. If the
// result will be signed, the next step negates the quotient, otherwise
// it returns the result.
//
// On the clock when we are done, o_busy is set to false, and o_valid set
// to true. (It is a violation of the ZipCPU internal protocol for both
// busy and valid to ever be true on the same clock. It is also a
// violation for busy to be false with valid true thereafter.)
//
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2017, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory. Run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype none
//
// `include "cpudefs.v"
//
module div(i_clk, i_rst, i_wr, i_signed, i_numerator, i_denominator,
o_busy, o_valid, o_err, o_quotient, o_flags);
parameter BW=32, LGBW = 5;
input wire i_clk, i_rst;
// Input parameters
input wire i_wr, i_signed;
input wire [(BW-1):0] i_numerator, i_denominator;
// Output parameters
output reg o_busy, o_valid, o_err;
output reg [(BW-1):0] o_quotient;
output wire [3:0] o_flags;
// r_busy is an internal busy register. It will clear one clock
// before we are valid, so it can't be o_busy ...
//
reg r_busy;
reg [(2*BW-2):0] r_divisor;
reg [(BW-1):0] r_dividend;
wire [(BW):0] diff; // , xdiff[(BW-1):0];
assign diff = r_dividend - r_divisor[(BW-1):0];
// assign xdiff= r_dividend - { 1'b0, r_divisor[(BW-1):1] };
reg r_sign, pre_sign, r_z, r_c, last_bit;
reg [(LGBW-1):0] r_bit;
reg zero_divisor;
// The Divide logic begins with r_busy. We use r_busy to determine
// whether or not the divide is in progress, vs being complete.
// Here, we clear r_busy on any reset and set it on i_wr (the request
// do to a divide). The divide ends when we are on the last bit,
// or equivalently when we discover we are dividing by zero.
initial r_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
r_busy <= 1'b0;
else if (i_wr)
r_busy <= 1'b1;
else if ((last_bit)||(zero_divisor))
r_busy <= 1'b0;
// o_busy is very similar to r_busy, save for some key differences.
// Primary among them is that o_busy needs to (possibly) be true
// for an extra clock after r_busy clears. This would be that extra
// clock where we negate the result (assuming a signed divide, and that
// the result is supposed to be negative.) Otherwise, the two are
// identical.
initial o_busy = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_busy <= 1'b0;
else if (i_wr)
o_busy <= 1'b1;
else if (((last_bit)&&(~r_sign))||(zero_divisor))
o_busy <= 1'b0;
else if (~r_busy)
o_busy <= 1'b0;
// If we are asked to divide by zero, we need to halt. The sooner
// we halt and report the error, the better. Hence, here we look
// for a zero divisor while being busy. The always above us will then
// look at this and halt a divide in the middle if we are trying to
// divide by zero.
//
// Note that this works off of the 2BW-1 length vector. If we can
// simplify that, it should simplify our logic as well.
initial zero_divisor = 1'b0;
always @(posedge i_clk)
// zero_divisor <= (r_divisor == 0)&&(r_busy);
if (i_rst)
zero_divisor <= 1'b0;
else if (i_wr)
zero_divisor <= (i_denominator == 0);
else if (!r_busy)
zero_divisor <= 1'b0;
// o_valid is part of the ZipCPU protocol. It will be set to true
// anytime our answer is valid and may be used by the calling module.
// Indeed, the ZipCPU will halt (and ignore us) once the i_wr has been
// set until o_valid gets set.
//
// Here, we clear o_valid on a reset, and any time we are on the last
// bit while busy (provided the sign is zero, or we are dividing by
// zero). Since o_valid is self-clearing, we don't need to clear
// it on an i_wr signal.
initial o_valid = 1'b0;
always @(posedge i_clk)
if (i_rst)
o_valid <= 1'b0;
else if (r_busy)
begin
if ((last_bit)||(zero_divisor))
o_valid <= (zero_divisor)||(!r_sign);
end else if (r_sign)
begin
o_valid <= (!zero_divisor); // 1'b1;
end else
o_valid <= 1'b0;
// Division by zero error reporting. Anytime we detect a zero divisor,
// we set our output error, and then hold it until we are valid and
// everything clears.
initial o_err = 1'b0;
always @(posedge i_clk)
if((i_rst)||(o_valid))
o_err <= 1'b0;
else if (((r_busy)||(r_sign))&&(zero_divisor))
o_err <= 1'b1;
else
o_err <= 1'b0;
// r_bit
//
// Keep track of which "bit" of our divide we are on. This number
// ranges from 31 down to zero. On any write, we set ourselves to
// 5'h1f. Otherwise, while we are busy (but not within the pre-sign
// adjustment stage), we subtract one from our value on every clock.
always @(posedge i_clk)
if ((r_busy)&&(!pre_sign))
r_bit <= r_bit + {(LGBW){1'b1}};
else
r_bit <= {(LGBW){1'b1}};
// last_bit
//
// This logic replaces a lot of logic that was inside our giant state
// machine with ... something simpler. In particular, we'll use this
// logic to determine we are processing our last bit. The only trick
// is, this bit needs to be set whenever (r_busy) and (r_bit == 0),
// hence we need to set on (r_busy) and (r_bit == 1) so as to be set
// when (r_bit == 0).
initial last_bit = 1'b0;
always @(posedge i_clk)
if (r_busy)
last_bit <= (r_bit == {{(LGBW-1){1'b0}},1'b1});
else
last_bit <= 1'b0;
// pre_sign
//
// This is part of the state machine. pre_sign indicates that we need
// a extra clock to take the absolute value of our inputs. It need only
// be true for the one clock, and then it must clear itself.
initial pre_sign = 1'b0;
always @(posedge i_clk)
if (i_wr)
pre_sign <= i_signed;
else
pre_sign <= 1'b0;
// As a result of our operation, we need to set the flags. The most
// difficult of these is the "Z" flag indicating that the result is
// zero. Here, we'll use the same logic that sets the low-order
// bit to clear our zero flag, and leave the zero flag set in all
// other cases. Well ... not quite. If we need to flip the sign of
// our value, then we can't quite clear the zero flag ... yet.
always @(posedge i_clk)
if((r_busy)&&(r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
// If we are busy, the upper bits of our divisor are
// zero (i.e., we got the shift right), and the top
// (carry) bit of the difference is zero (no overflow),
// then we could subtract our divisor from our dividend
// and hence we add a '1' to the quotient, while setting
// the zero flag to false.
r_z <= 1'b0;
else if ((!r_busy)&&(!r_sign))
r_z <= 1'b1;
// r_dividend
// This is initially the numerator. On a signed divide, it then becomes
// the absolute value of the numerator. We'll subtract from this value
// the divisor shifted as appropriate for every output bit we are
// looking for--just as with traditional long division.
always @(posedge i_clk)
if (pre_sign)
begin
// If we are doing a signed divide, then take the
// absolute value of the dividend
if (r_dividend[BW-1])
r_dividend <= -r_dividend;
// The begin/end block is important so we don't lose
// the fact that on an else we don't do anything.
end else if((r_busy)&&(r_divisor[(2*BW-2):(BW)]==0)&&(!diff[BW]))
// This is the condition whereby we set a '1' in our
// output quotient, and we subtract the (current)
// divisor from our dividend. (The difference is
// already kept in the diff vector above.)
r_dividend <= diff[(BW-1):0];
else if (!r_busy)
// Once we are done, and r_busy is no longer high, we'll
// always accept new values into our dividend. This
// guarantees that, when i_wr is set, the new value
// is already set as desired.
r_dividend <= i_numerator;
initial r_divisor = 0;
always @(posedge i_clk)
if (pre_sign)
begin
if (r_divisor[(2*BW-2)])
r_divisor[(2*BW-2):(BW-1)]
<= -r_divisor[(2*BW-2):(BW-1)];
end else if (r_busy)
r_divisor <= { 1'b0, r_divisor[(2*BW-2):1] };
else
r_divisor <= { i_denominator, {(BW-1){1'b0}} };
// r_sign
// is a flag for our state machine control(s). r_sign will be set to
// true any time we are doing a signed divide and the result must be
// negative. In that case, we take a final logic stage at the end of
// the divide to negate the output. This flag is what tells us we need
// to do that. r_busy will be true during the divide, then when r_busy
// goes low, r_sign will be checked, then the idle/reset stage will have
// been reached. For this reason, we cannot set r_sign unless we are
// up to something.
initial r_sign = 1'b0;
always @(posedge i_clk)
if (pre_sign)
r_sign <= ((r_divisor[(2*BW-2)])^(r_dividend[(BW-1)]));
else if (r_busy)
r_sign <= (r_sign)&&(!zero_divisor);
else
r_sign <= 1'b0;
always @(posedge i_clk)
if (r_busy)
begin
o_quotient <= { o_quotient[(BW-2):0], 1'b0 };
if ((r_divisor[(2*BW-2):(BW)] == 0)&&(!diff[BW]))
begin
o_quotient[0] <= 1'b1;
end
end else if (r_sign)
o_quotient <= -o_quotient;
else
o_quotient <= 0;
// Set Carry on an exact divide
// Perhaps nothing uses this, but ... well, I suppose we could remove
// this logic eventually, just ... not yet.
always @(posedge i_clk)
r_c <= (r_busy)&&((diff == 0)||(r_dividend == 0));
// The last flag: Negative. This flag is set assuming that the result
// of the divide was negative (i.e., the high order bit is set). This
// will also be true of an unsigned divide--if the high order bit is
// ever set upon completion. Indeed, you might argue that there's no
// logic involved.
wire w_n;
assign w_n = o_quotient[(BW-1)];
assign o_flags = { 1'b0, w_n, r_c, r_z };
endmodule