class,module関数
http://d.hatena.ne.jp/naoya/20090506/canonical_huffman_codesを見て。
id:naoyaさんのコードに対するツッコミはid:uemuの仕事だと思ってるけどちょっとだけいじってみた。
メソッドにする必要のないものってモジュールの関数にするんじゃないかなあ。
inner class的なものもあんまし使わないかもなあ。
あとあんましよくわかってないのでinit部のアルゴリズムのところは全く書き直してない。
# -*- coding: utf-8 -*- def _min_heapfy (a, i, h): l = 2 * i + 1 r = 2 * i + 2 if l < h and a[a[l]] < a[a[i]]: mi = l else: mi = i if r < h and a[a[r]] < a[a[mi]]: mi = r if mi != i: tmp = a[i] a[i] = a[mi] a[mi] = tmp _min_heapfy(a, mi, h) def _build_min_heap (a, h): for i in xrange((h - 1)/2, -1, -1): _min_heapfy(a, i, h) class CHC(object): def __init__(self, count): ## Phase One: Create an array A of 2n words n = len(count) A = [None] * (2 * n) for i in xrange(n): A[n + i] = count[i] A[i] = n + i _build_min_heap(A, n) ## Phase Two: Building Huffman tree h = n while h - 1 > 0: m1 = A[0] A[0] = A[h - 1] h -= 1 _min_heapfy(A, 0, h) m2 = A[0] A[h] = A[m1] + A[m2] A[0] = h A[m1] = A[m2] = h _min_heapfy(A, 0, h) ## Phase Three: Counting of leaf depths A[1] = 0 for i in xrange(3, 2 * n): A[i] = A[A[i]] + 1 ## codelen codelen = [None] * n for i in xrange(n): codelen[i] = A[n + i] maxlength = max(codelen) ## numl numl = [0] * (maxlength + 1) for i in xrange(n): numl[ codelen[i] ] += 1 ## firstcode firstcode = [0] * (maxlength + 1) for l in xrange(maxlength - 1, 0, -1): firstcode[l] = (firstcode[l + 1] + numl[l + 1]) / 2 ## codeword and symbol symbol = [None] * (maxlength + 1) for l in xrange(maxlength + 1): if numl[l] > 0: symbol[l] = [None] * numl[l] nextcode = firstcode[:] codeword = [None] * n for i in xrange(n): l = codelen[i] codeword[i] = nextcode[l] symbol[l][nextcode[l] - firstcode[l]] = i nextcode[l] += 1 self.codelen = codelen self.codeword = codeword self.firstcode = firstcode self.symbol = symbol def get_encoder(self): return Encoder(self) def get_decoder(self): return Decoder(self) class Encoder(object): def __init__(self, chc): self.codelen = chc.codelen self.codeword = chc.codeword def encode(self, i): s = "" v = self.codeword[i] ## decimal to binary strings for x in xrange(self.codelen[i]): s = str(v % 2) + s v = v / 2 return s class Decoder(object): def __init__(self, chc): self.firstcode = chc.firstcode self.symbol = chc.symbol def _nextinputbit(self, input): if len(input) > 0: return int(input.pop(0)) else: return def decode(self, input): v = self._nextinputbit(input) if v is None: return l = 1 while v < self.firstcode[l]: v = 2 * v + self._nextinputbit(input) l += 1 return self.symbol[l][v - self.firstcode[l]] # a b c d e f count = [ 10, 11, 12, 13, 22, 23 ] # count = [ 50, 11, 10, 13, 22, 23 ] chc = CHC(count) # print chc.codelen # print chc.codeword enc = chc.get_encoder() for i in xrange(len(count)): print "%d: %s" % (i, enc.encode(i)) dec = chc.get_decoder() code = list("0111011010001") ## 34521 while True: i = dec.decode(code) if i is None: break print i,
あんまし意図がよくわかってないのだけどdecodeしてるcodeってpopで破壊的なコードになってるけどいいのかな?
inputもDecoderの__init__に渡してyieldで返すとかの方がいいんじゃないかなあと思う。