consistent hashing
30越えて書いたことないなんて恥ずかしいので書いてみた。
#include <inttypes.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #define POINTS_PER_SERVER 100 #define MAX_HOST_LENGTH 128 typedef struct{ uint32_t index; uint32_t value; } point_item_t; typedef struct { char *name; int port; } server_t; typedef struct { point_item_t *points; server_t *server_list; uint32_t server_count; uint32_t points_count; } consistent_t; static consistent_t * init() { consistent_t *consistent; consistent = malloc(sizeof(consistent_t)); /* MAX 10 server */ consistent->server_list = malloc(sizeof(server_t) * 10); consistent->server_count = 0; return consistent; } static void add_server(consistent_t *consistent, char *name, int port) { uint32_t index = 0; index = consistent->server_count; if(index > 9){ return; } consistent->server_list[index].name = name; consistent->server_list[index].port = port; consistent->server_count++; consistent->points_count = POINTS_PER_SERVER * consistent->server_count; } uint32_t hash_one_at_a_time(const char *key, size_t key_length); static int point_item_cmp(const void *t1, const void *t2) { point_item_t *ct1= (point_item_t *)t1; point_item_t *ct2= (point_item_t *)t2; if (ct1->value == ct2->value){ return 0; }else if (ct1->value > ct2->value){ return 1; }else{ return -1; } } static void update(consistent_t *consistent) { uint32_t server_count; uint32_t point_index = 0; uint32_t server_index = 0; uint32_t i = 0; uint32_t counter= 0; uint32_t per_server= POINTS_PER_SERVER; uint32_t value; server_t *list = consistent->server_list; server_count = consistent->server_count; if(!consistent->points){ consistent->points = malloc(sizeof(point_item_t) * POINTS_PER_SERVER * server_count); } for (server_index= 0; server_index < server_count; ++server_index) { for (i = 1; i <= per_server; i++){ char host[MAX_HOST_LENGTH]= ""; size_t host_length; host_length = (size_t) snprintf(host, MAX_HOST_LENGTH, "%s:%u-%u", list[server_index].name, list[server_index].port, i -1); value = hash_one_at_a_time(host, host_length); //printf("%s point %u\n", host, value); consistent->points[point_index].index = server_index; consistent->points[point_index].value = value; point_index++; } } //sort qsort(consistent->points, point_index, sizeof(point_item_t), point_item_cmp); } static server_t get_server(consistent_t *consistent, char *key, size_t key_length) { uint32_t hash, index; hash = hash_one_at_a_time(key, key_length); uint32_t length = consistent->points_count; hash = hash; point_item_t *begin, *end, *left, *right, *middle; begin = left= consistent->points; end = right= consistent->points + length; while (left < right){ middle = left + (right - left) / 2; if (middle->value < hash){ left= middle + 1; }else{ right= middle; } } if (right == end){ right = begin; } return consistent->server_list[right->index]; } uint32_t hash_one_at_a_time(const char *key, size_t key_length) { const char *ptr= key; uint32_t value= 0; while (key_length--) { uint32_t val= (uint32_t) *ptr++; value += val; value += (value << 10); value ^= (value >> 6); } value += (value << 3); value ^= (value >> 11); value += (value << 15); return value; } int main() { uint32_t i; consistent_t *consistent; consistent = init(); add_server(consistent, "server01", 10001); add_server(consistent, "server02", 10002); add_server(consistent, "server03", 10003); add_server(consistent, "server04", 10004); update(consistent); /* for(i = 0; i < consistent->points_count; i++) { point_item_t c = consistent->points[i]; printf("name %s point %u\n", consistent->server_list[c.index].name, c.value); }*/ char key[128]; server_t server; for(i = 0; i < 50; i++){ sprintf(key, "data_key_%d", i); server = get_server(consistent, key, (int)strlen(key)); printf("%s %s\n", key, server.name); } }
最低限のシンプルな構成。
hash値はOne-at-a-Time Hash(jenkins)を使用。
サーバごとに100個のポイントを割り振る。重みづけはなし。
50個のキーで試したけどまあまあふれてると思う。
サーバ数を変えてみて別のキーに影響がないか見てみる。
3番目のサーバを消して走らせてみる。
add_server(consistent, "server01", 10001); add_server(consistent, "server02", 10002); //add_server(consistent, "server03", 10003); add_server(consistent, "server04", 10004);
4つのものとの差はdiffでみてみた。
1c1< data_key_0 server03
- -
> data_key_0 server01
4,7c4,7< data_key_3 server03< data_key_4 server03< data_key_5 server03< data_key_6 server03
- -
> data_key_3 server02
> data_key_4 server02
> data_key_5 server04
> data_key_6 server04
9,10c9,10< data_key_8 server03< data_key_9 server03
- -
> data_key_8 server02
> data_key_9 server02
16c16< data_key_15 server03
- -
> data_key_15 server04
19c19< data_key_18 server03
- -
> data_key_18 server01
28c28< data_key_27 server03
- -
> data_key_27 server02
40c40< data_key_39 server03
- -
> data_key_39 server02
44c44< data_key_43 server03
- -
> data_key_43 server02
48c48< data_key_47 server03
- -
> data_key_47 server02
50c50< data_key_49 server03
- -
> data_key_49 server02
ちゃんと削除されてるserver03のキーのみ別のserverに割り当てられてる。
別のキーには影響がない。
まあ当たり前なんだろうけど。