@inproceedings{05f435d98fec4830888e5db64a246191,

title = "An FPGA implementation of pipelined multiplicative division with IEEE Rounding",

abstract = "We report the results of an FPGA implementation of double precision floating-point division with IEEE rounding. We achieve a total latency (i.e., cycles times clock period) that is 2:6 times smaller than the latency of the fastest previous implementation on FPGAs. The amount of hardware, on the other hand, is comparable to commercial cores. The division circuit is based on Goldschmidt's algorithm. All IEEE rounding modes are supported and are implemented using dewpoint rounding. The precision of the initial approximation of the reciprocal is 14 bits. To save hardware and reduce the critical path, a half-sized 62x30 Booth radix-8 multiplier is used. This multiplier can receive both the multiplicand and the multiplier in carry-save representation. The division circuit is partitioned into four pipeline stages, has a latency of 11 cycles, and may restart a new double precision division operation after 8 cycles. Synthesis results of an implementation (not including the computation of the initial approximation of the reciprocal and the exponent path) guarantee a clock frequency of 131 MHz on an Altera Stratix II using 3592 ALMs. The implementation was successfully tested with over 10 million random vectors as well as over a million hard-to-round vectors.",

author = "Ronen Goldberg and Guy Even and Peter-M. Seidel",

year = "2007",

doi = "10.1109/FCCM.2007.59",

language = "???core.languages.und???",

isbn = "0-7695-2940-2",

series = "Proceedings 2007 IEEE Symposium on Field-Programme Custom Computing Machines, FCCM 2007",

publisher = "IEEE",

pages = "185--196",

booktitle = "15th Annual IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM 2007)",

note = "null ; Conference date: 23-04-2007 Through 25-04-2007",

}