commit 56235681859622e092d94da9ea9c6394be1653b0 Author: ddidderr Date: Mon Feb 12 09:23:56 2024 +0100 feat: initial implementation of ez-urandom A small, dependency-light crate that wraps `/dev/urandom` and exposes ergonomic helpers for the things people actually reach for randomness to do: read a primitive integer, sample a number in a half-open range, or generate a random ASCII string from a given alphabet. Why this crate exists --------------------- The Rust ecosystem already has `rand` and `getrandom`, but both bring in more surface area than is warranted when all you need is "give me some bytes from the OS." This crate intentionally stays tiny: a single `File` handle to `/dev/urandom`, a handful of typed readers, and one rejection-sampling helper. No traits to learn, no generic plumbing, no algorithmic CSPRNGs in-process — the kernel does that job already. What's included --------------- - `OsRandom`: thin wrapper around an open `/dev/urandom` handle, constructed via `try_new()`. Implements `io::Read` so it can plug into anywhere a byte source is expected. - Typed integer readers (`get_u8` ... `get_i128`, plus `usize`/`isize`) generated by a small `paste!` macro. Each reads exactly `size_of::()` bytes and interprets them in native-endian order so every bit pattern is equally likely. - `gen_range_u32(n)`: uniform integer in `0..n` via rejection sampling on top of `get_u32`. The cutoff is computed as the largest multiple of `n` that fits in `2^32`, so there is *no* modulo bias — values at or above the cutoff are discarded and resampled. This is the textbook fix for the "`rand() % n` is biased when `n` doesn't divide `RAND_MAX+1`" footgun. - `pick(set)` and `string_from(set, len)`: uniform byte / ASCII string drawn from a caller-supplied alphabet, layered on `gen_range_u32`. - `charset` module with `const`-built alphabets: `DIGITS`, `LOWERCASE`, `UPPERCASE`, `ALPHABETIC`, `ALPHANUMERIC`, `HEX_LOWER`, `HEX_UPPER`. Built with a small `const fn concat` so the arrays exist as compile- time constants with no runtime allocation. Design choices and tradeoffs ---------------------------- - Linux/Unix only by construction: opens `/dev/urandom` directly. This is a deliberate scope limit — supporting Windows would mean pulling in `BCryptGenRandom` and an abstraction layer, which defeats the point of the crate. Users who need cross-platform should reach for `getrandom`. - `unsafe_code = "forbid"` at the crate level. The implementation does not need `unsafe`, and forbidding it makes that contract explicit and machine-checked. - Clippy `pedantic` is on as `warn`, plus `unwrap_used = "warn"` and `todo = "warn"`, so the code is held to a tighter standard than the defaults from day one. - `panic = "unwind"` in release: the crate's `assert!`s (e.g. "set must not be empty") should be recoverable by callers that wrap them, not abort the process. - Native-endian integer decoding: the bits are uniformly random, so endianness is irrelevant to the distribution. Choosing native-endian avoids a needless byte swap on every read. - Rejection sampling uses a 32-bit cutoff regardless of the requested range. A 64-bit cutoff would reduce the rejection probability for ranges close to `u32::MAX`, but in practice the worst-case rejection rate is < 50% and the simpler code wins. Known limitations ----------------- - No async API; reads are blocking. `/dev/urandom` does not block in practice on Linux post-init, so this is fine for typical use. - `gen_range` is only provided for `u32`. Wider ranges would need a 64-bit variant; not added until a use case appears. - File handle is held for the lifetime of `OsRandom`. Callers that want a fresh fd per call should construct a new instance. Test Plan --------- - `cargo build` and `cargo clippy --all-targets` are clean under the pedantic lint set configured in `Cargo.toml`. - Manual smoke test by reading several integers and generating alphanumeric / hex / digit strings; values are well-distributed and no obvious bias is visible. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..a6cdf28 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ez-urandom" +version = "0.1.0" +dependencies = [ + "paste", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..afe9084 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "ez-urandom" +version = "0.1.0" +edition = "2024" + +[dependencies] +paste = "1" + +[lints.clippy] +pedantic = { level = "warn", priority = -1 } +todo = "warn" +unwrap_used = "warn" + +[lints.rust] +unsafe_code = "forbid" + +[profile.release] +debug = false +strip = true +lto = true +panic = "unwind" +codegen-units = 1 diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..e270811 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,3 @@ +group_imports = "StdExternalCrate" +imports_granularity = "Crate" +imports_layout = "HorizontalVertical" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..587ddf3 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,181 @@ +//! Easy access to operating-system randomness via `/dev/urandom`. +//! +//! The crate exposes [`OsRandom`], a small wrapper around an open handle to +//! `/dev/urandom` with helpers for reading primitive integers, sampling +//! uniformly without modulo bias, and generating random ASCII strings from a +//! caller-supplied alphabet. Common alphabets live in [`charset`]. + +use std::{ + fs::File, + io::{self, Read}, + mem::size_of, +}; + +use ::paste::paste; + +const DEV_URANDOM: &str = "/dev/urandom"; + +/// Common character sets for use with [`OsRandom::pick`] and +/// [`OsRandom::string_from`]. +pub mod charset { + const fn concat(parts: &[&[u8]]) -> [u8; N] { + let mut out = [0u8; N]; + let mut i = 0; + let mut p = 0; + while p < parts.len() { + let part = parts[p]; + let mut j = 0; + while j < part.len() { + out[i] = part[j]; + i += 1; + j += 1; + } + p += 1; + } + out + } + + /// ASCII decimal digits `0`–`9`. + pub const DIGITS: &[u8] = b"0123456789"; + /// ASCII lowercase letters `a`–`z`. + pub const LOWERCASE: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; + + const UPPERCASE_ARR: [u8; 26] = { + let mut out = [0u8; 26]; + let mut i = 0; + while i < 26 { + out[i] = LOWERCASE[i].to_ascii_uppercase(); + i += 1; + } + out + }; + /// ASCII uppercase letters `A`–`Z`. + pub const UPPERCASE: &[u8] = &UPPERCASE_ARR; + + const ALPHABETIC_ARR: [u8; 52] = concat(&[LOWERCASE, UPPERCASE]); + /// ASCII letters: [`LOWERCASE`] followed by [`UPPERCASE`]. + pub const ALPHABETIC: &[u8] = &ALPHABETIC_ARR; + + const ALPHANUMERIC_ARR: [u8; 62] = concat(&[LOWERCASE, UPPERCASE, DIGITS]); + /// ASCII letters and digits: [`LOWERCASE`], [`UPPERCASE`], then [`DIGITS`]. + pub const ALPHANUMERIC: &[u8] = &ALPHANUMERIC_ARR; + + const HEX_LOWER_ARR: [u8; 16] = concat(&[DIGITS, LOWERCASE.split_at(6).0]); + /// Lowercase hexadecimal digits `0`–`9`, `a`–`f`. + pub const HEX_LOWER: &[u8] = &HEX_LOWER_ARR; + + const HEX_UPPER_ARR: [u8; 16] = concat(&[DIGITS, UPPERCASE.split_at(6).0]); + /// Uppercase hexadecimal digits `0`–`9`, `A`–`F`. + pub const HEX_UPPER: &[u8] = &HEX_UPPER_ARR; +} + +/// Handle to the OS randomness source (`/dev/urandom`). +/// +/// Construct with [`OsRandom::try_new`], then call the typed `get_*` readers, +/// the uniform-sampling helpers, or use the [`Read`] impl to fill an arbitrary +/// buffer. +pub struct OsRandom { + devurandom: File, +} + +macro_rules! os_random_get_integer_impls { + ($($t:ty),*) => { + $( + paste! { + /// # Errors + /// Returns any I/O error produced while reading from `/dev/urandom`. + pub fn [](&mut self) -> std::io::Result<$t> { + let mut buf = [0u8; size_of::<$t>()]; + self.devurandom.read_exact(&mut buf)?; + Ok($t::from_ne_bytes(buf)) + } + } + )* + }; +} + +/// Constructor and primitive integer readers. +/// +/// Each `get_` method reads exactly `size_of::()` bytes from +/// `/dev/urandom` and interprets them in native-endian order, so every bit +/// pattern is equally likely. +impl OsRandom { + /// # Errors + /// Returns any I/O error produced while opening `/dev/urandom`. + pub fn try_new() -> Result { + let devurandom = File::open(DEV_URANDOM)?; + + Ok(Self { devurandom }) + } + + os_random_get_integer_impls!( + u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize + ); +} + +/// Uniform sampling helpers. +/// +/// All methods here use rejection sampling on top of [`OsRandom::get_u32`] +/// so the result is exactly uniform — no modulo bias. +impl OsRandom { + /// Returns a uniformly distributed integer in `0..n`. + /// + /// # Panics + /// Panics if `n == 0`. + /// + /// # Errors + /// Returns any I/O error produced while reading from `/dev/urandom`. + pub fn gen_range_u32(&mut self, n: u32) -> io::Result { + assert!(n > 0, "n must be greater than zero"); + let n64 = u64::from(n); + // Largest multiple of n that fits in 2^32. Values at or above the + // cutoff would skew the modulo, so we discard and resample. + let cutoff = (1u64 << 32) - ((1u64 << 32) % n64); + loop { + let v = u64::from(self.get_u32()?); + if v < cutoff { + return Ok(u32::try_from(v % n64).expect("v % n is bounded by n which fits in u32")); + } + } + } + + /// Returns a uniformly chosen byte from `set`. + /// + /// # Panics + /// Panics if `set` is empty or longer than `u32::MAX`. + /// + /// # Errors + /// Returns any I/O error produced while reading from `/dev/urandom`. + pub fn pick(&mut self, set: &[u8]) -> io::Result { + assert!(!set.is_empty(), "set must not be empty"); + let n = u32::try_from(set.len()).expect("set must fit in u32"); + let i = usize::try_from(self.gen_range_u32(n)?) + .expect("u32 fits in usize on supported platforms"); + Ok(set[i]) + } + + /// Returns a `String` of `len` characters drawn uniformly from `set`. + /// + /// # Panics + /// Panics if `set` is empty, longer than `u32::MAX`, or contains + /// non-ASCII bytes. + /// + /// # Errors + /// Returns any I/O error produced while reading from `/dev/urandom`. + pub fn string_from(&mut self, set: &[u8], len: usize) -> io::Result { + assert!(set.is_ascii(), "set must contain only ASCII bytes"); + let mut buf = vec![0u8; len]; + for byte in &mut buf { + *byte = self.pick(set)?; + } + Ok(String::from_utf8(buf).expect("ASCII bytes are valid UTF-8")) + } +} + +/// Forwards reads directly to the underlying `/dev/urandom` handle so an +/// `OsRandom` can be used anywhere an [`io::Read`] source is expected. +impl Read for OsRandom { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.devurandom.read(buf) + } +}