// Copyright 2020 Joshua J Baker. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.

#include "core/hashmap.h"

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void *(*_malloc)(size_t) = NULL;
static void *(*_realloc)(void *, size_t) = NULL;
static void (*_free)(void *) = NULL;

// hashmap_set_allocator allows for configuring a custom allocator for
// all hashmap library operations. This function, if needed, should be called
// only once at startup and a prior to calling hashmap_new().
void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void *))
{
    _malloc = malloc;
    _free = free;
}

#define panic(_msg_)                                                                               \
    {                                                                                              \
        fprintf(stderr, "panic: %s (%s:%d)\n", (_msg_), __FILE__, __LINE__);                       \
        exit(1);                                                                                   \
    }

struct bucket
{
    uint64_t hash : 48;
    uint64_t dib : 16;
};

// hashmap is an open addressed hash map using robinhood hashing.
struct hashmap
{
    void *(*malloc)(size_t);
    void *(*realloc)(void *, size_t);
    void (*free)(void *);
    bool oom;
    size_t elsize;
    size_t cap;
    uint64_t seed0;
    uint64_t seed1;
    uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1);
    int (*compare)(const void *a, const void *b, void *udata);
    void (*elfree)(void *item);
    void *udata;
    size_t bucketsz;
    size_t nbuckets;
    size_t count;
    size_t mask;
    size_t growat;
    size_t shrinkat;
    void *buckets;
    void *spare;
    void *edata;
};

static struct bucket *bucket_at(struct hashmap *map, size_t index)
{
    return (struct bucket *)(((char *)map->buckets) + (map->bucketsz * index));
}

static void *bucket_item(struct bucket *entry)
{
    return ((char *)entry) + sizeof(struct bucket);
}

static uint64_t get_hash(struct hashmap *map, const void *key)
{
    return map->hash(key, map->seed0, map->seed1) << 16 >> 16;
}

// hashmap_new_with_allocator returns a new hash map using a custom allocator.
// See hashmap_new for more information information
struct hashmap *
hashmap_new_with_allocator(void *(*_malloc)(size_t), void *(*_realloc)(void *, size_t),
                           void (*_free)(void *), size_t elsize, size_t cap, uint64_t seed0,
                           uint64_t seed1,
                           uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1),
                           int (*compare)(const void *a, const void *b, void *udata),
                           void (*elfree)(void *item), void *udata)
{
    _malloc = _malloc ? _malloc : malloc;
    _realloc = _realloc ? _realloc : realloc;
    _free = _free ? _free : free;
    int ncap = 16;
    if (cap < ncap) {
        cap = ncap;
    } else {
        while (ncap < cap) {
            ncap *= 2;
        }
        cap = ncap;
    }
    size_t bucketsz = sizeof(struct bucket) + elsize;
    while (bucketsz & (sizeof(uintptr_t) - 1)) {
        bucketsz++;
    }
    // hashmap + spare + edata
    size_t size = sizeof(struct hashmap) + bucketsz * 2;
    struct hashmap *map = _malloc(size);
    if (!map) {
        return NULL;
    }
    memset(map, 0, sizeof(struct hashmap));
    map->elsize = elsize;
    map->bucketsz = bucketsz;
    map->seed0 = seed0;
    map->seed1 = seed1;
    map->hash = hash;
    map->compare = compare;
    map->elfree = elfree;
    map->udata = udata;
    map->spare = ((char *)map) + sizeof(struct hashmap);
    map->edata = (char *)map->spare + bucketsz;
    map->cap = cap;
    map->nbuckets = cap;
    map->mask = map->nbuckets - 1;
    map->buckets = _malloc(map->bucketsz * map->nbuckets);
    if (!map->buckets) {
        _free(map);
        return NULL;
    }
    memset(map->buckets, 0, map->bucketsz * map->nbuckets);
    map->growat = map->nbuckets * 0.75;
    map->shrinkat = map->nbuckets * 0.10;
    map->malloc = _malloc;
    map->realloc = _realloc;
    map->free = _free;
    return map;
}

// hashmap_new returns a new hash map.
// Param `elsize` is the size of each element in the tree. Every element that
// is inserted, deleted, or retrieved will be this size.
// Param `cap` is the default lower capacity of the hashmap. Setting this to
// zero will default to 16.
// Params `seed0` and `seed1` are optional seed values that are passed to the
// following `hash` function. These can be any value you wish but it's often
// best to use randomly generated values.
// Param `hash` is a function that generates a hash value for an item. It's
// important that you provide a good hash function, otherwise it will perform
// poorly or be vulnerable to Denial-of-service attacks. This implementation
// comes with two helper functions `hashmap_sip()` and `hashmap_murmur()`.
// Param `compare` is a function that compares items in the tree. See the
// qsort stdlib function for an example of how this function works.
// The hashmap must be freed with hashmap_free().
// Param `elfree` is a function that frees a specific item. This should be NULL
// unless you're storing some kind of reference data in the hash.
struct hashmap *hashmap_new(size_t elsize, size_t cap, uint64_t seed0, uint64_t seed1,
                            uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1),
                            int (*compare)(const void *a, const void *b, void *udata),
                            void (*elfree)(void *item), void *udata)
{
    return hashmap_new_with_allocator((_malloc ? _malloc : malloc), (_realloc ? _realloc : realloc),
                                      (_free ? _free : free), elsize, cap, seed0, seed1, hash,
                                      compare, elfree, udata);
}

static void free_elements(struct hashmap *map)
{
    if (map->elfree) {
        for (size_t i = 0; i < map->nbuckets; i++) {
            struct bucket *bucket = bucket_at(map, i);
            if (bucket->dib)
                map->elfree(bucket_item(bucket));
        }
    }
}

// hashmap_clear quickly clears the map.
// Every item is called with the element-freeing function given in hashmap_new,
// if present, to free any data referenced in the elements of the hashmap.
// When the update_cap is provided, the map's capacity will be updated to match
// the currently number of allocated buckets. This is an optimization to ensure
// that this operation does not perform any allocations.
void hashmap_clear(struct hashmap *map, bool update_cap)
{
    map->count = 0;
    free_elements(map);
    if (update_cap) {
        map->cap = map->nbuckets;
    } else if (map->nbuckets != map->cap) {
        void *new_buckets = map->malloc(map->bucketsz * map->cap);
        if (new_buckets) {
            map->free(map->buckets);
            map->buckets = new_buckets;
        }
        map->nbuckets = map->cap;
    }
    memset(map->buckets, 0, map->bucketsz * map->nbuckets);
    map->mask = map->nbuckets - 1;
    map->growat = map->nbuckets * 0.75;
    map->shrinkat = map->nbuckets * 0.10;
}

static bool resize(struct hashmap *map, size_t new_cap)
{
    struct hashmap *map2 = hashmap_new(map->elsize, new_cap, map->seed1, map->seed1, map->hash,
                                       map->compare, map->elfree, map->udata);
    if (!map2) {
        return false;
    }
    for (size_t i = 0; i < map->nbuckets; i++) {
        struct bucket *entry = bucket_at(map, i);
        if (!entry->dib) {
            continue;
        }
        entry->dib = 1;
        size_t j = entry->hash & map2->mask;
        for (;;) {
            struct bucket *bucket = bucket_at(map2, j);
            if (bucket->dib == 0) {
                memcpy(bucket, entry, map->bucketsz);
                break;
            }
            if (bucket->dib < entry->dib) {
                memcpy(map2->spare, bucket, map->bucketsz);
                memcpy(bucket, entry, map->bucketsz);
                memcpy(entry, map2->spare, map->bucketsz);
            }
            j = (j + 1) & map2->mask;
            entry->dib += 1;
        }
    }
    map->free(map->buckets);
    map->buckets = map2->buckets;
    map->nbuckets = map2->nbuckets;
    map->mask = map2->mask;
    map->growat = map2->growat;
    map->shrinkat = map2->shrinkat;
    map->free(map2);
    return true;
}

// hashmap_set inserts or replaces an item in the hash map. If an item is
// replaced then it is returned otherwise NULL is returned. This operation
// may allocate memory. If the system is unable to allocate additional
// memory then NULL is returned and hashmap_oom() returns true.
void *hashmap_set(struct hashmap *map, const void *item)
{
    if (!item) {
        panic("item is null");
    }
    map->oom = false;
    if (map->count == map->growat) {
        if (!resize(map, map->nbuckets * 2)) {
            map->oom = true;
            return NULL;
        }
    }

    struct bucket *entry = map->edata;
    entry->hash = get_hash(map, item);
    entry->dib = 1;
    memcpy(bucket_item(entry), item, map->elsize);

    size_t i = entry->hash & map->mask;
    for (;;) {
        struct bucket *bucket = bucket_at(map, i);
        if (bucket->dib == 0) {
            memcpy(bucket, entry, map->bucketsz);
            map->count++;
            return NULL;
        }
        if (entry->hash == bucket->hash &&
            map->compare(bucket_item(entry), bucket_item(bucket), map->udata) == 0) {
            memcpy(map->spare, bucket_item(bucket), map->elsize);
            memcpy(bucket_item(bucket), bucket_item(entry), map->elsize);
            return map->spare;
        }
        if (bucket->dib < entry->dib) {
            memcpy(map->spare, bucket, map->bucketsz);
            memcpy(bucket, entry, map->bucketsz);
            memcpy(entry, map->spare, map->bucketsz);
        }
        i = (i + 1) & map->mask;
        entry->dib += 1;
    }
}

// hashmap_get returns the item based on the provided key. If the item is not
// found then NULL is returned.
void *hashmap_get(struct hashmap *map, const void *key)
{
    if (!key) {
        panic("key is null");
    }
    uint64_t hash = get_hash(map, key);
    size_t i = hash & map->mask;
    for (;;) {
        struct bucket *bucket = bucket_at(map, i);
        if (!bucket->dib) {
            return NULL;
        }
        if (bucket->hash == hash && map->compare(key, bucket_item(bucket), map->udata) == 0) {
            return bucket_item(bucket);
        }
        i = (i + 1) & map->mask;
    }
}

// hashmap_probe returns the item in the bucket at position or NULL if an item
// is not set for that bucket. The position is 'moduloed' by the number of
// buckets in the hashmap.
void *hashmap_probe(struct hashmap *map, uint64_t position)
{
    size_t i = position & map->mask;
    struct bucket *bucket = bucket_at(map, i);
    if (!bucket->dib) {
        return NULL;
    }
    return bucket_item(bucket);
}

// hashmap_delete removes an item from the hash map and returns it. If the
// item is not found then NULL is returned.
void *hashmap_delete(struct hashmap *map, void *key)
{
    if (!key) {
        panic("key is null");
    }
    map->oom = false;
    uint64_t hash = get_hash(map, key);
    size_t i = hash & map->mask;
    for (;;) {
        struct bucket *bucket = bucket_at(map, i);
        if (!bucket->dib) {
            return NULL;
        }
        if (bucket->hash == hash && map->compare(key, bucket_item(bucket), map->udata) == 0) {
            memcpy(map->spare, bucket_item(bucket), map->elsize);
            bucket->dib = 0;
            for (;;) {
                struct bucket *prev = bucket;
                i = (i + 1) & map->mask;
                bucket = bucket_at(map, i);
                if (bucket->dib <= 1) {
                    prev->dib = 0;
                    break;
                }
                memcpy(prev, bucket, map->bucketsz);
                prev->dib--;
            }
            map->count--;
            if (map->nbuckets > map->cap && map->count <= map->shrinkat) {
                // Ignore the return value. It's ok for the resize operation to
                // fail to allocate enough memory because a shrink operation
                // does not change the integrity of the data.
                resize(map, map->nbuckets / 2);
            }
            return map->spare;
        }
        i = (i + 1) & map->mask;
    }
}

// hashmap_count returns the number of items in the hash map.
size_t hashmap_count(struct hashmap *map)
{
    return map->count;
}

// hashmap_free frees the hash map
// Every item is called with the element-freeing function given in hashmap_new,
// if present, to free any data referenced in the elements of the hashmap.
void hashmap_free(struct hashmap *map)
{
    if (!map)
        return;
    free_elements(map);
    map->free(map->buckets);
    map->free(map);
}

// hashmap_oom returns true if the last hashmap_set() call failed due to the
// system being out of memory.
bool hashmap_oom(struct hashmap *map)
{
    return map->oom;
}

// hashmap_scan iterates over all items in the hash map
// Param `iter` can return false to stop iteration early.
// Returns false if the iteration has been stopped early.
bool hashmap_scan(struct hashmap *map, bool (*iter)(const void *item, void *udata), void *udata)
{
    for (size_t i = 0; i < map->nbuckets; i++) {
        struct bucket *bucket = bucket_at(map, i);
        if (bucket->dib) {
            if (!iter(bucket_item(bucket), udata)) {
                return false;
            }
        }
    }
    return true;
}

// hashmap_iter iterates one key at a time yielding a reference to an
// entry at each iteration. Useful to write simple loops and avoid writing
// dedicated callbacks and udata structures, as in hashmap_scan.
//
// map is a hash map handle. i is a pointer to a size_t cursor that
// should be initialized to 0 at the beginning of the loop. item is a void
// pointer pointer that is populated with the retrieved item. Note that this
// is NOT a copy of the item stored in the hash map and can be directly
// modified.
//
// Note that if hashmap_delete() is called on the hashmap being iterated,
// the buckets are rearranged and the iterator must be reset to 0, otherwise
// unexpected results may be returned after deletion.
//
// This function has not been tested for thread safety.
//
// The function returns true if an item was retrieved; false if the end of the
// iteration has been reached.
bool hashmap_iter(struct hashmap *map, size_t *i, void **item)
{
    struct bucket *bucket;

    do {
        if (*i >= map->nbuckets)
            return false;

        bucket = bucket_at(map, *i);
        (*i)++;
    } while (!bucket->dib);

    *item = bucket_item(bucket);

    return true;
}

//-----------------------------------------------------------------------------
// SipHash reference C implementation
//
// Copyright (c) 2012-2016 Jean-Philippe Aumasson
// <jeanphilippe.aumasson@gmail.com>
// Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
//
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication along
// with this software. If not, see
// <http://creativecommons.org/publicdomain/zero/1.0/>.
//
// default: SipHash-2-4
//-----------------------------------------------------------------------------
static uint64_t SIP64(const uint8_t *in, const size_t inlen, uint64_t seed0, uint64_t seed1)
{
#define U8TO64_LE(p)                                                                               \
    {(((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | ((uint64_t)((p)[2]) << 16) |              \
      ((uint64_t)((p)[3]) << 24) | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |       \
      ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))}
#define U64TO8_LE(p, v)                                                                            \
    {                                                                                              \
        U32TO8_LE((p), (uint32_t)((v)));                                                           \
        U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));                                                 \
    }
#define U32TO8_LE(p, v)                                                                            \
    {                                                                                              \
        (p)[0] = (uint8_t)((v));                                                                   \
        (p)[1] = (uint8_t)((v) >> 8);                                                              \
        (p)[2] = (uint8_t)((v) >> 16);                                                             \
        (p)[3] = (uint8_t)((v) >> 24);                                                             \
    }
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
#define SIPROUND                                                                                   \
    {                                                                                              \
        v0 += v1;                                                                                  \
        v1 = ROTL(v1, 13);                                                                         \
        v1 ^= v0;                                                                                  \
        v0 = ROTL(v0, 32);                                                                         \
        v2 += v3;                                                                                  \
        v3 = ROTL(v3, 16);                                                                         \
        v3 ^= v2;                                                                                  \
        v0 += v3;                                                                                  \
        v3 = ROTL(v3, 21);                                                                         \
        v3 ^= v0;                                                                                  \
        v2 += v1;                                                                                  \
        v1 = ROTL(v1, 17);                                                                         \
        v1 ^= v2;                                                                                  \
        v2 = ROTL(v2, 32);                                                                         \
    }
    uint64_t k0 = U8TO64_LE((uint8_t *)&seed0);
    uint64_t k1 = U8TO64_LE((uint8_t *)&seed1);
    uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1;
    uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0;
    uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1;
    uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0;
    const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
    for (; in != end; in += 8) {
        uint64_t m = U8TO64_LE(in);
        v3 ^= m;
        SIPROUND;
        SIPROUND;
        v0 ^= m;
    }
    const int left = inlen & 7;
    uint64_t b = ((uint64_t)inlen) << 56;
    switch (left) {
    case 7:
        b |= ((uint64_t)in[6]) << 48;
    case 6:
        b |= ((uint64_t)in[5]) << 40;
    case 5:
        b |= ((uint64_t)in[4]) << 32;
    case 4:
        b |= ((uint64_t)in[3]) << 24;
    case 3:
        b |= ((uint64_t)in[2]) << 16;
    case 2:
        b |= ((uint64_t)in[1]) << 8;
    case 1:
        b |= ((uint64_t)in[0]);
        break;
    case 0:
        break;
    }
    v3 ^= b;
    SIPROUND;
    SIPROUND;
    v0 ^= b;
    v2 ^= 0xff;
    SIPROUND;
    SIPROUND;
    SIPROUND;
    SIPROUND;
    b = v0 ^ v1 ^ v2 ^ v3;
    uint64_t out = 0;
    U64TO8_LE((uint8_t *)&out, b);
    return out;
}

//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
//
// Murmur3_86_128
//-----------------------------------------------------------------------------
static void MM86128(const void *key, const int len, uint32_t seed, void *out)
{
#define ROTL32(x, r) ((x << r) | (x >> (32 - r)))
#define FMIX32(h)                                                                                  \
    h ^= h >> 16;                                                                                  \
    h *= 0x85ebca6b;                                                                               \
    h ^= h >> 13;                                                                                  \
    h *= 0xc2b2ae35;                                                                               \
    h ^= h >> 16;
    const uint8_t *data = (const uint8_t *)key;
    const int nblocks = len / 16;
    uint32_t h1 = seed;
    uint32_t h2 = seed;
    uint32_t h3 = seed;
    uint32_t h4 = seed;
    uint32_t c1 = 0x239b961b;
    uint32_t c2 = 0xab0e9789;
    uint32_t c3 = 0x38b34ae5;
    uint32_t c4 = 0xa1e38b93;
    const uint32_t *blocks = (const uint32_t *)(data + nblocks * 16);
    for (int i = -nblocks; i; i++) {
        uint32_t k1 = blocks[i * 4 + 0];
        uint32_t k2 = blocks[i * 4 + 1];
        uint32_t k3 = blocks[i * 4 + 2];
        uint32_t k4 = blocks[i * 4 + 3];
        k1 *= c1;
        k1 = ROTL32(k1, 15);
        k1 *= c2;
        h1 ^= k1;
        h1 = ROTL32(h1, 19);
        h1 += h2;
        h1 = h1 * 5 + 0x561ccd1b;
        k2 *= c2;
        k2 = ROTL32(k2, 16);
        k2 *= c3;
        h2 ^= k2;
        h2 = ROTL32(h2, 17);
        h2 += h3;
        h2 = h2 * 5 + 0x0bcaa747;
        k3 *= c3;
        k3 = ROTL32(k3, 17);
        k3 *= c4;
        h3 ^= k3;
        h3 = ROTL32(h3, 15);
        h3 += h4;
        h3 = h3 * 5 + 0x96cd1c35;
        k4 *= c4;
        k4 = ROTL32(k4, 18);
        k4 *= c1;
        h4 ^= k4;
        h4 = ROTL32(h4, 13);
        h4 += h1;
        h4 = h4 * 5 + 0x32ac3b17;
    }
    const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
    uint32_t k1 = 0;
    uint32_t k2 = 0;
    uint32_t k3 = 0;
    uint32_t k4 = 0;
    switch (len & 15) {
    case 15:
        k4 ^= tail[14] << 16;
    case 14:
        k4 ^= tail[13] << 8;
    case 13:
        k4 ^= tail[12] << 0;
        k4 *= c4;
        k4 = ROTL32(k4, 18);
        k4 *= c1;
        h4 ^= k4;
    case 12:
        k3 ^= tail[11] << 24;
    case 11:
        k3 ^= tail[10] << 16;
    case 10:
        k3 ^= tail[9] << 8;
    case 9:
        k3 ^= tail[8] << 0;
        k3 *= c3;
        k3 = ROTL32(k3, 17);
        k3 *= c4;
        h3 ^= k3;
    case 8:
        k2 ^= tail[7] << 24;
    case 7:
        k2 ^= tail[6] << 16;
    case 6:
        k2 ^= tail[5] << 8;
    case 5:
        k2 ^= tail[4] << 0;
        k2 *= c2;
        k2 = ROTL32(k2, 16);
        k2 *= c3;
        h2 ^= k2;
    case 4:
        k1 ^= tail[3] << 24;
    case 3:
        k1 ^= tail[2] << 16;
    case 2:
        k1 ^= tail[1] << 8;
    case 1:
        k1 ^= tail[0] << 0;
        k1 *= c1;
        k1 = ROTL32(k1, 15);
        k1 *= c2;
        h1 ^= k1;
    };
    h1 ^= len;
    h2 ^= len;
    h3 ^= len;
    h4 ^= len;
    h1 += h2;
    h1 += h3;
    h1 += h4;
    h2 += h1;
    h3 += h1;
    h4 += h1;
    FMIX32(h1);
    FMIX32(h2);
    FMIX32(h3);
    FMIX32(h4);
    h1 += h2;
    h1 += h3;
    h1 += h4;
    h2 += h1;
    h3 += h1;
    h4 += h1;
    ((uint32_t *)out)[0] = h1;
    ((uint32_t *)out)[1] = h2;
    ((uint32_t *)out)[2] = h3;
    ((uint32_t *)out)[3] = h4;
}

// hashmap_sip returns a hash value for `data` using SipHash-2-4.
uint64_t hashmap_sip(const void *data, size_t len, uint64_t seed0, uint64_t seed1)
{
    return SIP64((uint8_t *)data, len, seed0, seed1);
}

// hashmap_murmur returns a hash value for `data` using Murmur3_86_128.
uint64_t hashmap_murmur(const void *data, size_t len, uint64_t seed0, uint64_t seed1)
{
    char out[16];
    MM86128(data, len, seed0, &out);
    return *(uint64_t *)out;
}

//==============================================================================
// TESTS AND BENCHMARKS
// $ cc -DHASHMAP_TEST hashmap.c && ./a.out              # run tests
// $ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out  # run benchmarks
//==============================================================================
#ifdef HASHMAP_TEST

static size_t deepcount(struct hashmap *map)
{
    size_t count = 0;
    for (size_t i = 0; i < map->nbuckets; i++) {
        if (bucket_at(map, i)->dib) {
            count++;
        }
    }
    return count;
}

#    pragma GCC diagnostic ignored "-Wextra"

#    include "core/hashmap.h"

#    include <assert.h>
#    include <stdio.h>
#    include <stdlib.h>
#    include <string.h>
#    include <time.h>

static bool rand_alloc_fail = false;
static int rand_alloc_fail_odds = 3; // 1 in 3 chance malloc will fail.
static uintptr_t total_allocs = 0;
static uintptr_t total_mem = 0;

static void *xmalloc(size_t size)
{
    if (rand_alloc_fail && rand() % rand_alloc_fail_odds == 0) {
        return NULL;
    }
    void *mem = malloc(sizeof(uintptr_t) + size);
    assert(mem);
    *(uintptr_t *)mem = size;
    total_allocs++;
    total_mem += size;
    return (char *)mem + sizeof(uintptr_t);
}

static void xfree(void *ptr)
{
    if (ptr) {
        total_mem -= *(uintptr_t *)((char *)ptr - sizeof(uintptr_t));
        free((char *)ptr - sizeof(uintptr_t));
        total_allocs--;
    }
}

static void shuffle(void *array, size_t numels, size_t elsize)
{
    char tmp[elsize];
    char *arr = array;
    for (size_t i = 0; i < numels - 1; i++) {
        int j = i + rand() / (RAND_MAX / (numels - i) + 1);
        memcpy(tmp, arr + j * elsize, elsize);
        memcpy(arr + j * elsize, arr + i * elsize, elsize);
        memcpy(arr + i * elsize, tmp, elsize);
    }
}

static bool iter_ints(const void *item, void *udata)
{
    int *vals = *(int **)udata;
    vals[*(int *)item] = 1;
    return true;
}

static int compare_ints(const void *a, const void *b)
{
    return *(int *)a - *(int *)b;
}

static int compare_ints_udata(const void *a, const void *b, void *udata)
{
    return *(int *)a - *(int *)b;
}

static int compare_strs(const void *a, const void *b, void *udata)
{
    return strcmp(*(char **)a, *(char **)b);
}

static uint64_t hash_int(const void *item, uint64_t seed0, uint64_t seed1)
{
    return hashmap_murmur(item, sizeof(int), seed0, seed1);
}

static uint64_t hash_str(const void *item, uint64_t seed0, uint64_t seed1)
{
    return hashmap_murmur(*(char **)item, strlen(*(char **)item), seed0, seed1);
}

static void free_str(void *item)
{
    xfree(*(char **)item);
}

static void all()
{
    int seed = getenv("SEED") ? atoi(getenv("SEED")) : time(NULL);
    int N = getenv("N") ? atoi(getenv("N")) : 2000;
    printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int));
    srand(seed);

    rand_alloc_fail = true;

    // test sip and murmur hashes
    assert(hashmap_sip("hello", 5, 1, 2) == 2957200328589801622);
    assert(hashmap_murmur("hello", 5, 1, 2) == 1682575153221130884);

    int *vals;
    while (!(vals = xmalloc(N * sizeof(int)))) {
    }
    for (int i = 0; i < N; i++) {
        vals[i] = i;
    }

    struct hashmap *map;

    while (!(
        map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints_udata, NULL, NULL))) {
    }
    shuffle(vals, N, sizeof(int));
    for (int i = 0; i < N; i++) {
        // // printf("== %d ==\n", vals[i]);
        assert(map->count == i);
        assert(map->count == hashmap_count(map));
        assert(map->count == deepcount(map));
        int *v;
        assert(!hashmap_get(map, &vals[i]));
        assert(!hashmap_delete(map, &vals[i]));
        while (true) {
            assert(!hashmap_set(map, &vals[i]));
            if (!hashmap_oom(map)) {
                break;
            }
        }

        for (int j = 0; j < i; j++) {
            v = hashmap_get(map, &vals[j]);
            assert(v && *v == vals[j]);
        }
        while (true) {
            v = hashmap_set(map, &vals[i]);
            if (!v) {
                assert(hashmap_oom(map));
                continue;
            } else {
                assert(!hashmap_oom(map));
                assert(v && *v == vals[i]);
                break;
            }
        }
        v = hashmap_get(map, &vals[i]);
        assert(v && *v == vals[i]);
        v = hashmap_delete(map, &vals[i]);
        assert(v && *v == vals[i]);
        assert(!hashmap_get(map, &vals[i]));
        assert(!hashmap_delete(map, &vals[i]));
        assert(!hashmap_set(map, &vals[i]));
        assert(map->count == i + 1);
        assert(map->count == hashmap_count(map));
        assert(map->count == deepcount(map));
    }

    int *vals2;
    while (!(vals2 = xmalloc(N * sizeof(int)))) {
    }
    memset(vals2, 0, N * sizeof(int));
    assert(hashmap_scan(map, iter_ints, &vals2));

    // Test hashmap_iter. This does the same as hashmap_scan above.
    size_t iter = 0;
    void *iter_val;
    while (hashmap_iter(map, &iter, &iter_val)) {
        assert(iter_ints(iter_val, &vals2));
    }
    for (int i = 0; i < N; i++) {
        assert(vals2[i] == 1);
    }
    xfree(vals2);

    shuffle(vals, N, sizeof(int));
    for (int i = 0; i < N; i++) {
        int *v;
        v = hashmap_delete(map, &vals[i]);
        assert(v && *v == vals[i]);
        assert(!hashmap_get(map, &vals[i]));
        assert(map->count == N - i - 1);
        assert(map->count == hashmap_count(map));
        assert(map->count == deepcount(map));
        for (int j = N - 1; j > i; j--) {
            v = hashmap_get(map, &vals[j]);
            assert(v && *v == vals[j]);
        }
    }

    for (int i = 0; i < N; i++) {
        while (true) {
            assert(!hashmap_set(map, &vals[i]));
            if (!hashmap_oom(map)) {
                break;
            }
        }
    }

    assert(map->count != 0);
    size_t prev_cap = map->cap;
    hashmap_clear(map, true);
    assert(prev_cap < map->cap);
    assert(map->count == 0);

    for (int i = 0; i < N; i++) {
        while (true) {
            assert(!hashmap_set(map, &vals[i]));
            if (!hashmap_oom(map)) {
                break;
            }
        }
    }

    prev_cap = map->cap;
    hashmap_clear(map, false);
    assert(prev_cap == map->cap);

    hashmap_free(map);

    xfree(vals);

    while (
        !(map = hashmap_new(sizeof(char *), 0, seed, seed, hash_str, compare_strs, free_str, NULL)))
        ;

    for (int i = 0; i < N; i++) {
        char *str;
        while (!(str = xmalloc(16)))
            ;
        sprintf(str, "s%i", i);
        while (!hashmap_set(map, &str))
            ;
    }

    hashmap_clear(map, false);
    assert(hashmap_count(map) == 0);

    for (int i = 0; i < N; i++) {
        char *str;
        while (!(str = xmalloc(16)))
            ;
        sprintf(str, "s%i", i);
        while (!hashmap_set(map, &str))
            ;
    }

    hashmap_free(map);

    if (total_allocs != 0) {
        fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs);
        exit(1);
    }
}

#    define bench(name, N, code)                                                                   \
        {                                                                                          \
            {                                                                                      \
                if (strlen(name) > 0) {                                                            \
                    printf("%-14s ", name);                                                        \
                }                                                                                  \
                size_t tmem = total_mem;                                                           \
                size_t tallocs = total_allocs;                                                     \
                uint64_t bytes = 0;                                                                \
                clock_t begin = clock();                                                           \
                for (int i = 0; i < N; i++) {                                                      \
                    (code);                                                                        \
                }                                                                                  \
                clock_t end = clock();                                                             \
                double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC;                      \
                double bytes_sec = (double)bytes / elapsed_secs;                                   \
                printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", N, elapsed_secs,            \
                       elapsed_secs / (double)N * 1e9, (double)N / elapsed_secs);                  \
                if (bytes > 0) {                                                                   \
                    printf(", %.1f GB/sec", bytes_sec / 1024 / 1024 / 1024);                       \
                }                                                                                  \
                if (total_mem > tmem) {                                                            \
                    size_t used_mem = total_mem - tmem;                                            \
                    printf(", %.2f bytes/op", (double)used_mem / N);                               \
                }                                                                                  \
                if (total_allocs > tallocs) {                                                      \
                    size_t used_allocs = total_allocs - tallocs;                                   \
                    printf(", %.2f allocs/op", (double)used_allocs / N);                           \
                }                                                                                  \
                printf("\n");                                                                      \
            }                                                                                      \
        }

static void benchmarks()
{
    int seed = getenv("SEED") ? atoi(getenv("SEED")) : time(NULL);
    int N = getenv("N") ? atoi(getenv("N")) : 5000000;
    printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int));
    srand(seed);

    int *vals = xmalloc(N * sizeof(int));
    for (int i = 0; i < N; i++) {
        vals[i] = i;
    }

    shuffle(vals, N, sizeof(int));

    struct hashmap *map;
    shuffle(vals, N, sizeof(int));

    map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints_udata, NULL, NULL);
    bench("set", N, {
        int *v = hashmap_set(map, &vals[i]);
        assert(!v);
    }) shuffle(vals, N, sizeof(int));
    bench("get", N, {
        int *v = hashmap_get(map, &vals[i]);
        assert(v && *v == vals[i]);
    }) shuffle(vals, N, sizeof(int));
    bench("delete", N, {
        int *v = hashmap_delete(map, &vals[i]);
        assert(v && *v == vals[i]);
    }) hashmap_free(map);

    map = hashmap_new(sizeof(int), N, seed, seed, hash_int, compare_ints_udata, NULL, NULL);
    bench("set (cap)", N, {
        int *v = hashmap_set(map, &vals[i]);
        assert(!v);
    }) shuffle(vals, N, sizeof(int));
    bench("get (cap)", N, {
        int *v = hashmap_get(map, &vals[i]);
        assert(v && *v == vals[i]);
    }) shuffle(vals, N, sizeof(int));
    bench("delete (cap)", N,
          {
              int *v = hashmap_delete(map, &vals[i]);
              assert(v && *v == vals[i]);
          })

        hashmap_free(map);

    xfree(vals);

    if (total_allocs != 0) {
        fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs);
        exit(1);
    }
}

int main()
{
    hashmap_set_allocator(xmalloc, xfree);

    if (getenv("BENCH")) {
        printf("Running hashmap.c benchmarks...\n");
        benchmarks();
    } else {
        printf("Running hashmap.c tests...\n");
        all();
        printf("PASSED\n");
    }
}

#endif