RFC: clc-compliant pseudo-random number generator

Ben Pfaff

One issue that comes up fairly often around here is the poor
quality of the pseudo-random number generators supplied with many
C implementations. As a result, we have to recommend things like
using the high-order bits returned by rand() instead of the
low-order bits, avoiding using rand() for anything that wants
decently random numbers, not using rand() if you want more than
approx. UINT_MAX total different sequences, and so on.

So I wrote some PRNG code that uses RC4, with a seed of up to 256
bytes. Here it is. I believe it is clc-compliant. Comments on
this and anything else are welcome.

----------------------------------------------------------------------
/*
* prng.c - Portable, ISO C90 and C99 compliant high-quality
* pseudo-random number generator based on the alleged RC4
* cipher. This PRNG should be suitable for most general-purpose
* uses. Not recommended for cryptographic or financial
* purposes. Not thread-safe.
*/

/*
* Copyright (c) 2004 Ben Pfaff <bl*@cs.stanford.edu>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the
* following conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. The author's and contributors' names may not be used to
* endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS
* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
*/

#include "prng.h"
#include <assert.h>
#include <float.h>
#include <limits.h>
#include <math.h>
#include <time.h>

/* RC4-based pseudo-random state. */
static unsigned char s[256];
static int s_i, s_j;

/* Nonzero if PRNG has been seeded. */
static int seeded;

/* Take advantage of inlining if possible. */
#if __STDC_VERSION__ >= 199901L
#define INLINE inline
#else
#define INLINE
#endif

/* Swap bytes. */
static INLINE void
swap_byte (unsigned char *a, unsigned char *b)
{
unsigned char t = *a;
*a = *b;
*b = t;
}

/* If KEY is non-null and SIZE is nonzero, seeds the
pseudo-random number generator based on the SIZE bytes in BUF.
At most the first 256 bytes of BUF are used. If KEY is a null
pointer and SIZE is zero, seeds the pseudo-random number
generator based on the current time.

If this function is not called by the user before any prng_get
function, it is called automatically to obtain a time-based
seed. */
void
prng_seed (const void *key_, size_t size)
{
const unsigned char *key = key_;
size_t key_idx;
int i, j;

assert ((key != NULL && size > 0) || (key == NULL && size == 0));

if (key == NULL)
{
static time_t t;

t = (t == 0) ? time (NULL) : t + 1;
key = (void *) &t;
size = sizeof t;
}

for (i = 0; i < 256; i++)
s[i] = i;
for (key_idx = 0, i = j = 0; i < 256; i++)
{
j = (j + s[i] + key[key_idx]) & 255;
swap_byte (s + i, s + j);
if (++key_idx >= size)
key_idx = 0;
}

s_i = s_j = 0;
seeded = 1;
}

/* Returns a pseudo-random integer in the range [0,255]. */
unsigned char
prng_get_octet (void)
{
if (!seeded)
prng_seed (NULL, 0);

s_i = (s_i + 1) & 255;
s_j = (s_j + s[s_i]) & 255;
swap_byte (s + s_i, s + s_j);

return s[(s[s_i] + s[s_j]) & 255];
}

/* Returns a pseudo-random integer in the range [0,UCHAR_MAX]. */
unsigned char
prng_get_byte (void)
{
if (UCHAR_MAX == 255)
{
/* Normal case for 8-bit bytes. */
return prng_get_octet ();
}
else
{
/* Handle oversize bytes. */
unsigned byte = prng_get_octet ();
unsigned done = 255;
while ((unsigned char) done != UCHAR_MAX)
{
byte = (byte << 8) | prng_get_octet ();
done = (done << 8) | 255;
}
return byte;
}
}

/* Fills BUF with SIZE pseudo-random bytes. */
void
prng_get_bytes (void *buf_, size_t size)
{
unsigned char *buf;

for (buf = buf_; size-- > 0; buf++)
*buf = prng_get_byte ();
}

/* Returns a pseudo-random unsigned long in the range [0,
ULONG_MAX]. */
unsigned long
prng_get_ulong (void)
{
unsigned long ulng = prng_get_octet ();
unsigned long done = 255;

while (done != ULONG_MAX)
{
ulng = (ulng << 8) | prng_get_octet ();
done = (done << 8) | 255;
}

return ulng;
}

/* Returns a pseudo-random long in the range [0, LONG_MAX]. */
long
prng_get_long (void)
{
for (;;)
{
unsigned ulng = prng_get_ulong ();
if (ulng <= LONG_MAX)
return ulng;
}
}

/* Returns a pseudo-random unsigned int in the range [0,
UINT_MAX]. */
unsigned
prng_get_uint (void)
{
unsigned uint = prng_get_octet ();
unsigned done = 255;

while (done != UINT_MAX)
{
uint = (uint << 8) | prng_get_octet ();
done = (done << 8) | 255;
}

return uint;
}

/* Returns a pseudo-random int in the range [0, INT_MAX]. */
int
prng_get_int (void)
{
for (;;)
{
unsigned uint = prng_get_uint ();
if (uint <= INT_MAX)
return uint;
}
}

/* Returns a pseudo-random floating-point number from the uniform
distribution with range [0,1). */
double
prng_get_double (void)
{
for (;;)
{
unsigned long ulng = prng_get_ulong ();
if (ulng != ULONG_MAX)
return (double) ulng / ULONG_MAX;
}
}

/* Returns a pseudo-random floating-point number from the
distribution with mean 0 and standard deviation 1. (Multiply
the result by the desired standard deviation, then add the
desired mean.) */
double
prng_get_double_normal (void)
{
/* Knuth, _The Art of Computer Programming_, Vol. 2, 3.4.1C,
Algorithm P. */
static double next_normal = DBL_MAX;
double this_normal;

if (next_normal != DBL_MAX)
{
this_normal = next_normal;
next_normal = DBL_MAX;
}
else
{
double v1, v2, s;

do
{
double u1 = prng_get_double ();
double u2 = prng_get_double ();
v1 = 2.0 * u1 - 1.0;
v2 = 2.0 * u2 - 1.0;
s = v1 * v1 + v2 * v2;
}
while (s >= 1);

this_normal = v1 * sqrt (-2. * log (s) / s);
next_normal = v2 * sqrt (-2. * log (s) / s);
}

return this_normal;
}
----------------------------------------------------------------------
#ifndef PRNG_H_INCLUDED
#define PRNG_H_INCLUDED

#include <stddef.h>

void prng_seed (const void *, size_t);
unsigned char prng_get_octet (void);
unsigned char prng_get_byte (void);
void prng_get_bytes (void *, size_t);
unsigned long prng_get_ulong (void);
long prng_get_long (void);
unsigned prng_get_uint (void);
int prng_get_int (void);
double prng_get_double (void);
double prng_get_double_normal (void);

#endif /* prng.h */
----------------------------------------------------------------------
--
"There's only one thing that will make them stop hating you.
And that's being so good at what you do that they can't ignore you.
I told them you were the best. Now you damn well better be."
--Orson Scott Card, _Ender's Game_

Nov 14 '05

Subscribe Post Reply

6160

Eric Sosman

Old Wolf wrote:

[unattributed:]
On BCC 5.5, Ben's PRNG does not compile, whereas the rand()
provided takes 0.95 nanoseconds.
[...]
Error E2063 prng.c 244: Illegal initialization in function prng_get_double_normal
*** 1 errors in Compile ***

The error was at the line:
static double next_normal = DBL_MAX;

Does the Standard say that DBL_MAX must be a compile-time constant?

Section 5.2.4.2.2 paragraph 9:

The values given in the following list shall be
replaced by constant expressions [...]
FLT_MAX [...]
DBL_MAX [...]
LDBL_MAX [...]

It appears the compiler is broken or is being used in a
non-conforming mode.

--
Er*********@sun.com

Nov 14 '05 #51

Hallvard B Furuseth

Peter Nilsson wrote:

Hallvard B Furuseth <h.b.furuseth(nospam)@usit.uio(nospam).no> wrote in message news:<HB**************@bombur.uio.no>...
The #if is necessary because `size >>= CHAR_BIT' is invalid if size only
has CHAR_BIT value bits.

I prefer avoid such #if-s wherever reasonably possible, as just look
ugly (IMHO).

size = size >> CHAR_BIT - 1 >> 1;

I don't think obfuscating the shift is any prettier. In particular
since you now need a comment to explain why you are doing it.
But, I did note that time_t conversion suffers the same problem, and I
don't think that's quite as easy to fix, at least not if you want to
consider aberrant implementations.

I think that's OK. Initialization from time() isn't supposed to be
repeatable, so it shouldn't matter if two initializations with the
same time_t can give different results.

--
Hallvard

Nov 14 '05 #52

Eric Sosman

Ben Pfaff wrote:

[a PRNG with assorted convenience functions, one of which is]

/* Returns a pseudo-random floating-point number from the uniform
distribution with range [0,1). */
double
prng_get_double (void)
{
for (;;)
{
unsigned long ulng = prng_get_ulong ();
if (ulng != ULONG_MAX)
return (double) ulng / ULONG_MAX;
}
}

I think it would be simpler, possibly faster, and
probably just as good to replace the function body with
this one-liner:

return prng_get_ulong() / (ULONG_MAX + 1.0);

However, I'm not sure "just as good" is "best." The
code as given (and as modified) uses an unsigned long's
worth of pseudo-random bits no matter how wide or narrow
the double's mantissa might be. Perhaps it would be better
to choose the bit count based on DBL_MANT_DIG instead.
This should be easy if FLT_RADIX is a power of two; if you
want to cater to base ten or to the DeathStation 9000 it
might be trickier but I think it could be done without too
great a run-time speed penalty.

--
Er*********@sun.com

Nov 14 '05 #53

Arthur J. O'Dwyer

On Wed, 21 Jan 2004, Ben Pfaff wrote:

Da*****@cern.ch (Dan Pop) writes:
Ben Pfaff <bl*@cs.stanford.edu> writes:
Da*****@cern.ch (Dan Pop) writes:
Ben Pfaff <bl*@cs.stanford.edu> writes:

>/* Take advantage of inlining if possible. */
>#if __STDC_VERSION__ >= 199901L

Any use of __STDC_VERSION__ invokes undefined behaviour in a C90
implementation. Such an implementation can [...]
[...] define it as a macro with a value greater
than 199901L.

Only an actively malicious compiler would do so.

Wrong! See my previous post in the thread.

In which you wrote:
What if the implementor decided to include the full adoption date in
the definition of __STDC_VERSION__, so that it would become 1990mmddL?
Since there was no formal specification of the __STDC_VERSION__
macro at the time, such a decision is not a priori unreasonable.

Why would the implementor do so when C95 used 199409L, C99 used
199901L, and no other standard or (as far as I know) standard
draft defined __STDC_VERSION__ at all?

Possibly because the compiler writer in 1990 was not aware of
the precedents set by the C standards committee in 1994 and 1999?

After all, the idea to incorporate __STDC_VERSION__ must have
come from somewhere, so it's not terribly implausible that someone
might have used it in an existing compiler. Pretty unlikely, yes.
In this case, I think it would make more sense to simply leave
out the tiny 'inline' efficiency thing, or else add a real macro
along the lines of HAS_INLINE, and let the user #define it if he
wants it.

-Arthur

Nov 14 '05 #54

Peter Nilsson

Hallvard B Furuseth <h.b.furuseth(nospam)@usit.uio(nospam).no> wrote in message news:<HB**************@bombur.uio.no>...

Peter Nilsson wrote:
Hallvard B Furuseth <h.b.furuseth(nospam)@usit.uio(nospam).no> wrote in
message news:<HB**************@bombur.uio.no>...

The #if is necessary because `size >>= CHAR_BIT' is invalid if size only
has CHAR_BIT value bits.

I prefer avoid such #if-s wherever reasonably possible, as just look
ugly (IMHO).

size = size >> CHAR_BIT - 1 >> 1;

I don't think obfuscating the shift is any prettier. In particular
since you now need a comment to explain why you are doing it.

It doesn't _need_ a comment per se, but I'd say that, if anything...

#if UINT_MAX > UCHAR_MAX

....probably warrants a similar comment. The same set of people will
likely be scratching their heads as to why we bothered writing these
lines.

But, I did note that time_t conversion suffers the same problem, and I
don't think that's quite as easy to fix, at least not if you want to
consider aberrant implementations.

I think that's OK. Initialization from time() isn't supposed to be
repeatable, so it shouldn't matter if two initializations with the
same time_t can give different results.

Yup, I realised that about 15 minutes after posting. Mind you, I have
had at least one occasion to rewind the system clock to trace a
program fault.

--
Peter

Nov 14 '05 #55

Scott Fluhrer

"Ian Woods" <ne******@wuggyNOCAPS.org> wrote in message
news:Xn*****************************@217.32.252.50 ...

"Scott Fluhrer" <sf******@ix.netcom.com> wrote in
news:KS*******************@newsread1.news.pas.eart hlink.net:

"Hallvard B Furuseth" <h.b.furuseth(nospam)@usit.uio(nospam).no> wrote
in message news:HB**************@bombur.uio.no...
Scott Fluhrer wrote:
> - Since you are using RC4, if you really want to be
> cryptographically secure, you'll need to discard some part of the
> keystream at initialization
> time. Estimates as to how much you need to discard varies somewhat
> --

I've
> seen estimates of 256 - 2048 bytes.

Why is that? Are the first bytes less random than later ones?

Well, no. It's just that by looking at the first bytes, you can get
some hints at what the seed (key) was. In addition, if you have
enough initial outputs from related seeds, well, you can figure out
what the seeds are.

This is true for every PRNG since they're deterministic. The question is
how hard is it given any subsequence of output given the algorithm - you
only need to determine the seed at any single point in the sequence to

know the whole sequence. Unless you're doing some behind-the-scenes magic with
the seed, I don't see how throwing away 'n' numbers helps you. Could you
explain further?

Certainly. For one, the seed you supply RC4 is not simply the initial
state. Instead, there is a procedure (in Ben's code, the logic resides in
the function prng_seed) to convert that initial seed into an initial state.
This procedure is known to produce biases in the initial state, and these
biases produce biases in the initial output -- not very strong ones,
granted, but strong enough to be cryptographically interesting. When you
run the procedure to generate outputs (prng_get_octet in Ben's code), this
tends to smear out the biases, and thus the artifacts are limited to the
first couple initial outputs.

In addition, things get more interesting if you allow someone to view the
initial output of multiple related seeds. For example, if you give an
attacker the first octet output from the following seeds:

00 00 00 XXXXXXXX
00 00 01 XXXXXXXX
00 00 02 XXXXXXXX
etc...

then with enough of these outputs, the attacker can deduce values of the
common XXXXXXXX portion. Real protocols have been attacked because of this
observation, and this has made the cryptographical community quite wary of
the initial output of RC4.

Now, if you are just looking at creating a statistically good looking random
number generator, the biases from the first point are probably too weak to
really worry about, and the second point about related seeds is irrelevent.
However, cryptographically speaking, they are valid concerns.

(If you need references to any of the above, just ask)

--
poncho

Nov 14 '05 #56

Kevin Bracey

In message <bu**********@sunnews.cern.ch>
Da*****@cern.ch (Dan Pop) wrote:

In <05****************@tematic.com> Kevin Bracey <ke**********@tematic.com> writes:

> On a more practical note, what WOULD a reasonable value for a
> compiler in C90 mode to set __STDC_VERSION__ to, if it wanted to set
> it?

Methinks the actual question asked is off topic in this newsgroup.
If you need c.s.c, you should know where to find it.

Heh. But if I went to comp.std.c, I'd probably be told it was off-topic there
too, as the C90 standard doesn't specify it :)
BTW, wasn't I supposed to be plonked by you (due to my mental problems)?

Yeah. My newsreader's filter kept vanishing though for some reason. Didn't
think it was worth investigating why. I had set it to a one month expiry
anyway, as I assumed you were just going through a bad patch.

--
Kevin Bracey, Principal Software Engineer
Tematic Ltd Tel: +44 (0) 1223 503464
182-190 Newmarket Road Fax: +44 (0) 1223 503458
Cambridge, CB5 8HE, United Kingdom WWW: http://www.tematic.com/

Nov 14 '05 #57

Hallvard B Furuseth

Eric Sosman wrote:

Ben Pfaff wrote:
[a PRNG with assorted convenience functions, one of which is]

/* Returns a pseudo-random floating-point number from the uniform
distribution with range [0,1). */
double
prng_get_double (void)
{
for (;;)
{
unsigned long ulng = prng_get_ulong ();
if (ulng != ULONG_MAX)
return (double) ulng / ULONG_MAX;
}
}

I think it would be simpler, possibly faster, and
probably just as good to replace the function body with
this one-liner:

return prng_get_ulong() / (ULONG_MAX + 1.0);

Floating-point arithmetic is inexact. For both the original version and
yours, there is no guarantee that the result will be < 1.0. The version
I suggested earlier should work:

for (;;)
{
double ret = (double) prng_get_ulong () / ULONG_MAX;
if (ret < 1)
return ret;
}

--
Hallvard

Nov 14 '05 #58

Dan Pop

In <87************@pfaff.stanford.edu> Ben Pfaff <bl*@cs.stanford.edu> writes:

Da*****@cern.ch (Dan Pop) writes:
In <87************@pfaff.stanford.edu> Ben Pfaff <bl*@cs.stanford.edu> writes:
Da*****@cern.ch (Dan Pop) writes:

In <87************@pfaff.stanford.edu> Ben Pfaff <bl*@cs.stanford.edu> writes:

>/* Take advantage of inlining if possible. */
>#if __STDC_VERSION__ >= 199901L

Any use of __STDC_VERSION__ invokes undefined behaviour in a C90
implementation. Such an implementation can use it as a magic
identifier during preprocessing (e.g. the preprocessor's nasal demon
generator ;-) or simply define it as a macro with a value greater
than 199901L.

Only an actively malicious compiler would do so.

Wrong! See my previous post in the thread.

In which you wrote:
What if the implementor decided to include the full adoption date in
the definition of __STDC_VERSION__, so that it would become 1990mmddL?
Since there was no formal specification of the __STDC_VERSION__
macro at the time, such a decision is not a priori unreasonable.

Why would the implementor do so when C95 used 199409L, C99 used
199901L, and no other standard or (as far as I know) standard
draft defined __STDC_VERSION__ at all?

Because the implementor may have done it in 1991, years before the feature
got standardised. The __STDC_VERSION__ name is fairly intuitive for such
a feature...

BTW, you're sounding more and more like Trollsdale in this subthread.

Dan
--
Dan Pop
DESY Zeuthen, RZ group
Email: Da*****@ifh.de

Nov 14 '05 #59

Dan Pop

In <cf****************@tematic.com> Kevin Bracey <ke**********@tematic.com> writes:

In message <bu**********@sunnews.cern.ch>
Da*****@cern.ch (Dan Pop) wrote:
>> In <05****************@tematic.com> Kevin Bracey <ke**********@tematic.com> writes:
>>
> > > On a more practical note, what WOULD a reasonable value for a
> > > compiler in C90 mode to set __STDC_VERSION__ to, if it wanted to set
> > > it?

Methinks the actual question asked is off topic in this newsgroup.
If you need c.s.c, you should know where to find it.

Heh. But if I went to comp.std.c, I'd probably be told it was off-topic there
too, as the C90 standard doesn't specify it :)

You could avoid this by simply asking about the meanings of 01 in 199901L
and 09 in 199409L :-)

Actually, the latter is documented at http://www.lysator.liu.se/c/na1.html

The macro __STDC_VERSION__ shall expand to 199409L. (The Normative
Addendum was formally registered with ISO in September 1994.)

So, you could skip this stage and go and ask when was C90 formally
registered with ISO (month and year). Then, you could figure out how a
C90 implementor with particularly good foresight should have defined his
__STDC_VERSION__ ;-)

Dan
--
Dan Pop
DESY Zeuthen, RZ group
Email: Da*****@ifh.de

Nov 14 '05 #60

Old Wolf

> > Does the Standard say that DBL_MAX must be a compile-time constant?

Yes. From C99 5.2.4.2.2:

The values given in the following list shall be replaced by
constant expressions with implementation-defined values that
are greater than or equal to those shown:

- maximum representable finite floating-point number, (1 - b-p)bemax
FLT_MAX 1E+37
DBL_MAX 1E+37
LDBL_MAX 1E+37

BCC 5.x has never made any pretense at C99 compatibility.

OTOH, BCC 6 does claim C99 compatibility.
OTOH (I need 3 hands..) BCC 6 is non-free (I think the
cheapest option is to pay $10 for a version with some restrictive licencing)

Nov 14 '05 #61

Ben Pfaff

ol*****@inspire.net.nz (Old Wolf) writes:

> Does the Standard say that DBL_MAX must be a compile-time constant?

Yes. From C99 5.2.4.2.2:

The values given in the following list shall be replaced by
constant expressions with implementation-defined values that
are greater than or equal to those shown:

- maximum representable finite floating-point number, (1 - b-p)bemax
FLT_MAX 1E+37
DBL_MAX 1E+37
LDBL_MAX 1E+37

BCC 5.x has never made any pretense at C99 compatibility.

I was planning to reply that practically the same text was in
C90, but in fact my "final draft" copy here doesn't say that
these must be constant expressions. Hmm. I'll do something else
then.
--
"I don't have C&V for that handy, but I've got Dan Pop."
--E. Gibbons

Nov 14 '05 #62

CBFalconer

Old Wolf wrote:

Does the Standard say that DBL_MAX must be a compile-time constant?

Yes. From C99 5.2.4.2.2:

The values given in the following list shall be replaced by
constant expressions with implementation-defined values that
are greater than or equal to those shown:

- maximum representable finite floating-point number, (1 - b-p)bemax
FLT_MAX 1E+37
DBL_MAX 1E+37
LDBL_MAX 1E+37

BCC 5.x has never made any pretense at C99 compatibility.

The same values appear in the C89 standard. However it doesn't
appear to specify 'constant'.

--
Chuck F (cb********@yahoo.com) (cb********@worldnet.att.net)
Available for consulting/temporary embedded and systems.
<http://cbfalconer.home.att.net> USE worldnet address!

Nov 14 '05 #63

Ben Pfaff

Thanks to everyone for your comments. I've revised my code and
posted it at
http://benpfaff.org/writings/clc/random.html
Further comments are welcome. I will certainly continue to
improve my code as deficiencies are pointed out. (I am
particularly uncertain whether the get_octet() function is
implemented correctly.)

Ben
--
"...deficient support can be a virtue.
It keeps the amateurs off."
--Bjarne Stroustrup

Nov 14 '05 #64

Peter Shaggy Haywood

Groovy hepcat E. Robert Tisdale was jivin' on Mon, 19 Jan 2004
18:19:49 -0800 in comp.lang.c.
Re: RFC: clc-compliant pseudo-random number generator's a cool scene!
Dig it!

Ben Pfaff wrote:
One issue that comes up fairly often around here is the poor
quality of the pseudo-random number generators supplied with many
C implementations. As a result, we have to recommend things like
using the high-order bits returned by rand() instead of the
low-order bits, avoiding using rand() for anything that wants
decently random numbers, not using rand() if you want more than
approx. UINT_MAX total different sequences, and so on.

So I wrote some PRNG code that uses RC4, with a seed of up to 256
bytes. Here it is. I believe it is clc-compliant.
Comments on this and anything else are welcome.

[snip]

Why is your pseudo random number generator "better" than rand()?

Please read the above exerpt. Ben explains his rationale. Then read
the FAQ. This gives greater insight into what Ben said.

--

Dig the even newer still, yet more improved, sig!

http://alphalink.com.au/~phaywood/
"Ain't I'm a dog?" - Ronny Self, Ain't I'm a Dog, written by G. Sherry & W. Walker.
I know it's not "technically correct" English; but since when was rock & roll "technically correct"?

Nov 14 '05 #65

Peter Shaggy Haywood

Groovy hepcat E. Robert Tisdale was jivin' on Mon, 19 Jan 2004
23:34:46 -0800 in comp.lang.c.
Re: RFC: clc-compliant pseudo-random number generator's a cool scene!
Dig it!

Ben Pfaff wrote:
Simon Biber writes:
The error was at the line:
static double next_normal = DBL_MAX;

Bizarre. Can this be anything other than a bug in Borland C++?

Did you include float.h?

Tisdale, can't you read for yourself? He did indeed include float.h.

--

Dig the even newer still, yet more improved, sig!

http://alphalink.com.au/~phaywood/
"Ain't I'm a dog?" - Ronny Self, Ain't I'm a Dog, written by G. Sherry & W. Walker.
I know it's not "technically correct" English; but since when was rock & roll "technically correct"?

Nov 14 '05 #66

Michael Wojcik

In article <87************@pfaff.stanford.edu>, Ben Pfaff <bl*@cs.stanford.edu> writes:

ol*****@inspire.net.nz (Old Wolf) writes:
> Does the Standard say that DBL_MAX must be a compile-time constant?

Yes. From C99 5.2.4.2.2:

The values given in the following list shall be replaced by
constant expressions with implementation-defined values that
are greater than or equal to those shown:

- maximum representable finite floating-point number, (1 - b-p)bemax
FLT_MAX 1E+37
DBL_MAX 1E+37
LDBL_MAX 1E+37

BCC 5.x has never made any pretense at C99 compatibility.

I was planning to reply that practically the same text was in
C90, but in fact my "final draft" copy here doesn't say that
these must be constant expressions.

Yes, the final text of 9899-1990 says:

Of the values in the <float.h> header, FLT_RADIX shall be a
constant expression suitable for use in #if preprocessing
directives; all other values need not be constant expressions.
(C90 5.2.4.2.2)

(I think I managed to quote that bit correctly.)

So unlike C99, C90 specifically allowed FLT_MAX, DBL_MAX, etc to
be non-constant expressions.

--
Michael Wojcik mi************@microfocus.com

This is a "rubbering action game," a 2D platformer where you control a
girl equipped with an elastic rope with a fishing hook at the end.
-- review of _Umihara Kawase Shun_ for the Sony Playstation

Nov 14 '05 #67

Hallvard B Furuseth

Ben Pfaff wrote:

http://benpfaff.org/writings/clc/random.html get_octet (const void *bytes_, size_t n_bytes, size_t octet_idx)
(...)
return bytes[octet_idx];
return bytes[octet_idx % n_bytes];
prng_get_ulong (void)
(...)
ulng = (ulng << CHAR_BIT) | prng_get_octet ();
ulng = (ulng << 8) | prng_get_octet ();

Same in prng_get_uint().
prng_get_double (void)
{
for (;;)
{
double dbl = (double) prng_get_ulong () / ULONG_MAX;
if (dbl >= 0.0 && dbl < 1.0)
return dbl;
Heh. You are a lot more paranoid than me. While I can imagine dbl
could be >= 1, I cannot imagine it getting the wrong sign. OTOH,
the standard doesn't seem to forbid it...

BTW, the statement

double dbl = prng_get_ulong () / (ULONG_MAX + 1.0);

which someone suggested is slightly better even though you still need to
test that the result is less than 1.0. It does eliminate a loop once
every ULONG_MAX calls.
prng_get_double_normal (void)
Just to be _quite_ safe, you could insert

static double limit; /* guard against overflow in -2.*log(s)/s */
if (limit == 0)
limit = log(DBL_MAX/2) / (DBL_MAX/2);

before the loop, and end it with `while (s >= 1 || s < limit);'.
Otherwise the function will crash if prng_get_double() returns 0.5
or something very close to it twice in a row.
(I am particularly uncertain whether the get_octet() function is
implemented correctly.)

I #defined char as short and CHAR_BIT as 13 and inserted some asserts
and test output. Seems to be correct except for the bug above.

--
Hallvard

Nov 14 '05 #68

Ben Pfaff

Thanks for the tips. I incorporated all of them.

Hallvard B Furuseth <h.b.furuseth(nospam)@usit.uio(nospam).no> writes:

prng_get_double_normal (void)

Just to be _quite_ safe, you could insert

static double limit; /* guard against overflow in -2.*log(s)/s */
if (limit == 0)
limit = log(DBL_MAX/2) / (DBL_MAX/2);

before the loop, and end it with `while (s >= 1 || s < limit);'.
Otherwise the function will crash if prng_get_double() returns 0.5
or something very close to it twice in a row.

This one is particularly clear-sighted.
--
"What is appropriate for the master is not appropriate for the novice.
You must understand the Tao before transcending structure."
--The Tao of Programming

Nov 14 '05 #69

Peter Nilsson

"Ben Pfaff" <bl*@cs.stanford.edu> wrote in message
news:87************@pfaff.stanford.edu...

Thanks to everyone for your comments. I've revised my code and
posted it at
http://benpfaff.org/writings/clc/random.html
Further comments are welcome. I will certainly continue to
improve my code as deficiencies are pointed out. (I am
particularly uncertain whether the get_octet() function is
implemented correctly.)

From the link...

c |= ((unsigned) bytes[second_byte] & bits_left_mask) << bits_filled;

The (unsigned) cast is redundant in light of the mask. However, leaving in
the cast, you can remove the second mask by delaying the 0xFF mask of the
original octet component.

My first (untested) attempt was... [complete with student commentary ;-]

static unsigned char
get_octet (const void *buf, size_t nbytes, size_t ioct)
{
unsigned char octet; /* result octet */
const unsigned char *bp = buf; /* byte pointer */

if (CHAR_BIT == 8)
{
octet = bp[ioct % nbytes];
}
else
{
size_t ibit = ioct * 8u; /* raw bit index */
size_t ibyte = ibit / CHAR_BIT; /* raw byte index */

ibit %= CHAR_BIT; /* index byte's bit index */
ibyte %= nbytes; /* byte index (normalised) */

octet = bp[ibyte] >> ibit; /* [partial] octet */

/*
// Does the octet spans two bytes?
// This can't happen if CHAR_BIT is a multiple of 8.
// (I doubt some compilers would optimise out the
// redundant if without an explicit test)
*/
if (CHAR_BIT % 8 != 0 && ibit > CHAR_BIT - 8)
{
ibyte = (ibyte == nbytes) ? 0 : ibyte + 1; /* next byte */
/* complete octet */
octet |= ((unsigned) bp[ibyte]) << (CHAR_BIT - ibit);
}

octet &= 0xFF; /* mask 8 bits only */
}

return octet;
}

--
Peter

Nov 14 '05 #70

Dan Pop

In <87************@pfaff.stanford.edu> Ben Pfaff <bl*@cs.stanford.edu> writes:

ol*****@inspire.net.nz (Old Wolf) writes:
> Does the Standard say that DBL_MAX must be a compile-time constant?

Yes. From C99 5.2.4.2.2:

The values given in the following list shall be replaced by
constant expressions with implementation-defined values that
are greater than or equal to those shown:

- maximum representable finite floating-point number, (1 - b-p)bemax
FLT_MAX 1E+37
DBL_MAX 1E+37
LDBL_MAX 1E+37

BCC 5.x has never made any pretense at C99 compatibility.

I was planning to reply that practically the same text was in
C90, but in fact my "final draft" copy here doesn't say that
these must be constant expressions.

It *explicitly* allows them not to:

Of the values in the <float.h> header, FLT_RADIX shall be a
constant expression suitable for use in #if preprocessing directives;
all other values need not be constant expressions.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Dan
--
Dan Pop
DESY Zeuthen, RZ group
Email: Da*****@ifh.de

Nov 14 '05 #71

Similar topics