Doge log

Abby CTO 雑賀 力王のオフィシャルサイトです

consistent hashing

30越えて書いたことないなんて恥ずかしいので書いてみた。

#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define POINTS_PER_SERVER 100
#define MAX_HOST_LENGTH 128

typedef struct{
    uint32_t index;
    uint32_t value;
} point_item_t;

typedef struct {
    char *name;
    int port;
} server_t;

typedef struct {
    point_item_t *points;
    server_t *server_list;
    uint32_t server_count;
    uint32_t points_count;
} consistent_t;


static consistent_t * 
init()
{
    consistent_t *consistent;
    consistent = malloc(sizeof(consistent_t));
    
    /* MAX 10 server */
    consistent->server_list = malloc(sizeof(server_t) * 10);
    consistent->server_count = 0;
    return consistent;
}

static void
add_server(consistent_t *consistent, char *name, int port)
{

    uint32_t index = 0;
    index = consistent->server_count;
    if(index > 9){
        return;
    }
    consistent->server_list[index].name = name;
    consistent->server_list[index].port = port;

    consistent->server_count++;
    consistent->points_count = POINTS_PER_SERVER * consistent->server_count;
}


uint32_t 
hash_one_at_a_time(const char *key, size_t key_length);

static int 
point_item_cmp(const void *t1, const void *t2)
{
    point_item_t *ct1= (point_item_t *)t1;
    point_item_t *ct2= (point_item_t *)t2;

    if (ct1->value == ct2->value){
        return 0;
    }else if (ct1->value > ct2->value){
        return 1;
    }else{
        return -1;
    }
}

static void
update(consistent_t *consistent)
{
    uint32_t server_count;

    uint32_t point_index = 0;
    uint32_t server_index = 0;
    uint32_t i = 0;
    uint32_t counter= 0;
    uint32_t per_server= POINTS_PER_SERVER;
    uint32_t value;

    server_t *list = consistent->server_list;
    server_count = consistent->server_count;
    if(!consistent->points){ 
        consistent->points = malloc(sizeof(point_item_t) * POINTS_PER_SERVER * server_count);
    }

    
    for (server_index= 0; server_index < server_count; ++server_index) {

        for (i = 1; i <= per_server; i++){

            char host[MAX_HOST_LENGTH]= "";
            size_t host_length;
            
            host_length = (size_t) snprintf(host, MAX_HOST_LENGTH,
                                                  "%s:%u-%u",
                                                  list[server_index].name,
                                                  list[server_index].port,
                                                  i -1);
            value = hash_one_at_a_time(host, host_length);
            
            //printf("%s point %u\n", host, value);
            consistent->points[point_index].index = server_index;
            consistent->points[point_index].value = value;
            point_index++;
        }

    }
    
    //sort
    qsort(consistent->points, point_index, sizeof(point_item_t), point_item_cmp);
}

static server_t 
get_server(consistent_t *consistent, char *key, size_t key_length)
{
    uint32_t hash, index;
    hash = hash_one_at_a_time(key, key_length);
    uint32_t length = consistent->points_count;

    hash = hash;
    point_item_t *begin, *end, *left, *right, *middle;
    begin = left= consistent->points;
    end = right= consistent->points + length;

    while (left < right){
        middle = left + (right - left) / 2;
        if (middle->value < hash){
            left= middle + 1;
        }else{
            right= middle;
        }
    }
    if (right == end){
        right = begin;
    }
    return consistent->server_list[right->index];
}


uint32_t 
hash_one_at_a_time(const char *key, size_t key_length)
{
    const char *ptr= key;
    uint32_t value= 0;

    while (key_length--)
    {
        uint32_t val= (uint32_t) *ptr++;
        value += val;
        value += (value << 10);
        value ^= (value >> 6);
    }
    value += (value << 3);
    value ^= (value >> 11);
    value += (value << 15);

    return value;
}


int 
main()
{
    uint32_t i;
    consistent_t *consistent;
    consistent = init();
    add_server(consistent, "server01", 10001);
    add_server(consistent, "server02", 10002);
    add_server(consistent, "server03", 10003);
    add_server(consistent, "server04", 10004);
    update(consistent);
    
    /*
    for(i = 0; i < consistent->points_count; i++) {
        point_item_t c = consistent->points[i];
        printf("name %s point %u\n", consistent->server_list[c.index].name, c.value);
    }*/
    
    char key[128];
    server_t server;
    for(i = 0; i < 50; i++){
        sprintf(key, "data_key_%d", i);
        server = get_server(consistent, key, (int)strlen(key));
        printf("%s %s\n", key, server.name);
    }
}

最低限のシンプルな構成。
hash値はOne-at-a-Time Hash(jenkins)を使用。
サーバごとに100個のポイントを割り振る。重みづけはなし。

50個のキーで試したけどまあまあふれてると思う。

サーバ数を変えてみて別のキーに影響がないか見てみる。
3番目のサーバを消して走らせてみる。

    add_server(consistent, "server01", 10001);
    add_server(consistent, "server02", 10002);
    //add_server(consistent, "server03", 10003);
    add_server(consistent, "server04", 10004);

4つのものとの差はdiffでみてみた。

1c1< data_key_0 server03

    • -

> data_key_0 server01
4,7c4,7< data_key_3 server03< data_key_4 server03< data_key_5 server03< data_key_6 server03

    • -

> data_key_3 server02
> data_key_4 server02
> data_key_5 server04
> data_key_6 server04
9,10c9,10< data_key_8 server03< data_key_9 server03

    • -

> data_key_8 server02
> data_key_9 server02
16c16< data_key_15 server03

    • -

> data_key_15 server04
19c19< data_key_18 server03

    • -

> data_key_18 server01
28c28< data_key_27 server03

    • -

> data_key_27 server02
40c40< data_key_39 server03

    • -

> data_key_39 server02
44c44< data_key_43 server03

    • -

> data_key_43 server02
48c48< data_key_47 server03

    • -

> data_key_47 server02
50c50< data_key_49 server03

    • -

> data_key_49 server02

ちゃんと削除されてるserver03のキーのみ別のserverに割り当てられてる。
別のキーには影響がない。
まあ当たり前なんだろうけど。