#include "StdAfx.h" #include "Lzari.h" ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// CLzariEx::CLzariEx(void) :m_nInCur(0) ,m_pSrcData(NULL) ,m_textsize(0) ,m_codesize(0) ,m_printcount(0) ,m_low(0) ,m_high(Q4) ,m_value(0) ,m_shifts(0) { m_match_length = 0; m_match_position = 0; m_nDestLength = 0; m_nSrcLength = 0; m_buffer_putbit = 0; m_mask_putbit = 128; m_buffer_getbit = 0; m_mask_getbit = 0; memset(m_dad, 0, N+1); memset(m_lson, 0, N+1); memset(m_rson, 0, N+257); memset(m_char_to_sym, 0, N); memset(m_sym_to_char, 0, N_CHAR+1); memset(m_sym_freq, 0, N_CHAR+1); memset(m_sym_cum, 0, N_CHAR+1); memset(m_position_cum, 0, N+1); memset(m_text_buf, 0, N+F-1); memset(m_text_buf, 0, N+F-1); } CLzariEx::~CLzariEx(void) { m_vtDestData.clear(); } void CLzariEx::Release() { m_nInCur = 0; m_pSrcData = NULL; m_textsize = 0; m_codesize = 0; m_printcount = 0; m_low = 0; m_high = Q4; m_value = 0; m_shifts = 0; m_match_length = 0; m_match_position = 0; m_nDestLength = 0; m_nSrcLength = 0; m_buffer_putbit = 0; m_mask_putbit = 128; m_buffer_getbit = 0; m_mask_getbit = 0; memset(m_dad, 0, N+1); memset(m_lson, 0, N+1); memset(m_rson, 0, N+257); memset(m_char_to_sym, 0, N); memset(m_sym_to_char, 0, N_CHAR+1); memset(m_sym_freq, 0, N_CHAR+1); memset(m_sym_cum, 0, N_CHAR+1); memset(m_position_cum, 0, N+1); memset(m_text_buf, 0, N+F-1); memset(m_text_buf, 0, N+F-1); m_vtDestData.clear(); } void CLzariEx::Error(char *message) { printf("\n%s\n", message); exit(EXIT_FAILURE); } void CLzariEx::PutBit(int bit) { if (bit) m_buffer_putbit |= m_mask_putbit; if ((m_mask_putbit >>= 1) == 0) { m_vtDestData.push_back(m_buffer_putbit); m_buffer_putbit = 0; m_mask_putbit = 128; m_codesize++; } } void CLzariEx::FlushBitBuffer() { for ( int i = 0; i < 7; i++ ) PutBit(0); } int CLzariEx::GetBit(void) { if ((m_mask_getbit >>= 1) == 0) { if ( m_nInCur < m_nSrcLength ) m_buffer_getbit = m_pSrcData[m_nInCur++]; m_mask_getbit = 128; } return ((m_buffer_getbit & m_mask_getbit) != 0); } void CLzariEx::InitTree(void) /* Initialize trees */ { int i; /* For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right and left children of node i. These nodes need not be initialized. Also, m_dad[i] is the parent of node i. These are initialized to NIL (= N), which stands for 'not used.' For i = 0 to 255, m_rson[N + i + 1] is the root of the tree for strings that begin with character i. These are initialized to NIL. Note there are 256 trees. */ for (i = N + 1; i <= N + 256; i++) m_rson[i] = NIL; /* root */ for (i = 0; i < N; i++) m_dad[i] = NIL; /* node */ } void CLzariEx::InsertNode(int r) /* Inserts string of length F, m_text_buf[r..r+F-1], into one of the trees (m_text_buf[r]'th tree) and returns the longest-match position and length via the global variables m_match_position and m_match_length. If m_match_length = F, then removes the old node in favor of the new one, because the old one will be deleted sooner. Note r plays double role, as tree node and position in buffer. */ { int i, p, cmp, temp; unsigned char *key; cmp = 1; key = &m_text_buf[r]; p = N + 1 + key[0]; m_rson[r] = m_lson[r] = NIL; m_match_length = 0; for ( ; ; ) { if (cmp >= 0) { if (m_rson[p] != NIL) p = m_rson[p]; else { m_rson[p] = r; m_dad[r] = p; return; } } else { if (m_lson[p] != NIL) p = m_lson[p]; else { m_lson[p] = r; m_dad[r] = p; return; } } for (i = 1; i < F; i++) if ((cmp = key[i] - m_text_buf[p + i]) != 0) break; if (i > THRESHOLD) { if (i > m_match_length) { m_match_position = (r - p) & (N - 1); if ((m_match_length = i) >= F) break; } else if (i == m_match_length) { if ((temp = (r - p) & (N - 1)) < m_match_position) m_match_position = temp; } } } m_dad[r] = m_dad[p]; m_lson[r] = m_lson[p]; m_rson[r] = m_rson[p]; m_dad[m_lson[p]] = r; m_dad[m_rson[p]] = r; if (m_rson[m_dad[p]] == p) m_rson[m_dad[p]] = r; else m_lson[m_dad[p]] = r; m_dad[p] = NIL; /* remove p */ } void CLzariEx::DeleteNode(int p) /* Delete node p from tree */ { int q; if (m_dad[p] == NIL) return; /* not in tree */ if (m_rson[p] == NIL) q = m_lson[p]; else if (m_lson[p] == NIL) q = m_rson[p]; else { q = m_lson[p]; if (m_rson[q] != NIL) { do { q = m_rson[q]; } while (m_rson[q] != NIL); m_rson[m_dad[q]] = m_lson[q]; m_dad[m_lson[q]] = m_dad[q]; m_lson[q] = m_lson[p]; m_dad[m_lson[p]] = q; } m_rson[q] = m_rson[p]; m_dad[m_rson[p]] = q; } m_dad[q] = m_dad[p]; if (m_rson[m_dad[p]] == p) m_rson[m_dad[p]] = q; else m_lson[m_dad[p]] = q; m_dad[p] = NIL; } void CLzariEx::StartModel(void) /* Initialize model */ { int ch, sym, i; m_sym_cum[N_CHAR] = 0; for (sym = N_CHAR; sym >= 1; sym--) { ch = sym - 1; m_char_to_sym[ch] = sym; m_sym_to_char[sym] = ch; m_sym_freq[sym] = 1; m_sym_cum[sym - 1] = m_sym_cum[sym] + m_sym_freq[sym]; } m_sym_freq[0] = 0; /* sentinel (!= m_sym_freq[1]) */ m_position_cum[N] = 0; for (i = N; i >= 1; i--) m_position_cum[i - 1] = m_position_cum[i] + 10000 / (i + 200); /* empirical distribution function (quite tentative) */ /* Please devise a better mechanism! */ } void CLzariEx::UpdateModel(int sym) { int i, c, ch_i, ch_sym; if (m_sym_cum[0] >= MAX_CUM) { c = 0; for (i = N_CHAR; i > 0; i--) { m_sym_cum[i] = c; c += (m_sym_freq[i] = (m_sym_freq[i] + 1) >> 1); } m_sym_cum[0] = c; } for (i = sym; m_sym_freq[i] == m_sym_freq[i - 1]; i--) ; if (i < sym) { ch_i = m_sym_to_char[i]; ch_sym = m_sym_to_char[sym]; m_sym_to_char[i] = ch_sym; m_sym_to_char[sym] = ch_i; m_char_to_sym[ch_i] = sym; m_char_to_sym[ch_sym] = i; } m_sym_freq[i]++; while (--i >= 0) m_sym_cum[i]++; } void CLzariEx::Output(int bit) /* Output 1 bit, followed by its complements */ { PutBit(bit); for ( ; m_shifts > 0; m_shifts--) PutBit(! bit); } void CLzariEx::EncodeChar(int ch) { int sym; unsigned long int range; sym = m_char_to_sym[ch]; range = m_high - m_low; m_high = m_low + (range * m_sym_cum[sym - 1]) / m_sym_cum[0]; m_low += (range * m_sym_cum[sym ]) / m_sym_cum[0]; for ( ; ; ) { if (m_high <= Q2) Output(0); else if (m_low >= Q2) { Output(1); m_low -= Q2; m_high -= Q2; } else if (m_low >= Q1 && m_high <= Q3) { m_shifts++; m_low -= Q1; m_high -= Q1; } else break; m_low += m_low; m_high += m_high; } UpdateModel(sym); } void CLzariEx::EncodePosition(int position) { unsigned long int range; range = m_high - m_low; m_high = m_low + (range * m_position_cum[position ]) / m_position_cum[0]; m_low += (range * m_position_cum[position + 1]) / m_position_cum[0]; for ( ; ; ) { if (m_high <= Q2) Output(0); else if (m_low >= Q2) { Output(1); m_low -= Q2; m_high -= Q2; } else if (m_low >= Q1 && m_high <= Q3) { m_shifts++; m_low -= Q1; m_high -= Q1; } else break; m_low += m_low; m_high += m_high; } } void CLzariEx::EncodeEnd(void) { m_shifts++; if (m_low < Q1) Output(0); else Output(1); FlushBitBuffer(); /* flush bits remaining in buffer */ } int CLzariEx::BinarySearchSym(unsigned int x) /* 1 if x >= m_sym_cum[1], N_CHAR if m_sym_cum[N_CHAR] > x, i such that m_sym_cum[i - 1] > x >= m_sym_cum[i] otherwise */ { int i, j, k; i = 1; j = N_CHAR; while (i < j) { k = (i + j) / 2; if (m_sym_cum[k] > x) i = k + 1; else j = k; } return i; } int CLzariEx::BinarySearchPos(unsigned int x) /* 0 if x >= m_position_cum[1], N - 1 if m_position_cum[N] > x, i such that m_position_cum[i] > x >= m_position_cum[i + 1] otherwise */ { int i, j, k; i = 1; j = N; while (i < j) { k = (i + j) / 2; if (m_position_cum[k] > x) i = k + 1; else j = k; } return i - 1; } void CLzariEx::StartDecode(void) { int i; for (i = 0; i < M + 2; i++) m_value = 2 * m_value + GetBit(); } int CLzariEx::DecodeChar(void) { int sym, ch; unsigned long int range; range = m_high - m_low; sym = BinarySearchSym((unsigned int)(((m_value - m_low + 1) * m_sym_cum[0] - 1) / range)); m_high = m_low + (range * m_sym_cum[sym - 1]) / m_sym_cum[0]; m_low += (range * m_sym_cum[sym ]) / m_sym_cum[0]; for ( ; ; ) { if (m_low >= Q2) { m_value -= Q2; m_low -= Q2; m_high -= Q2; } else if (m_low >= Q1 && m_high <= Q3) { m_value -= Q1; m_low -= Q1; m_high -= Q1; } else if (m_high > Q2) break; m_low += m_low; m_high += m_high; m_value = 2 * m_value + GetBit(); } ch = m_sym_to_char[sym]; UpdateModel(sym); return ch; } int CLzariEx::DecodePosition(void) { int position; unsigned long int range; range = m_high - m_low; position = BinarySearchPos((unsigned int)(((m_value - m_low + 1) * m_position_cum[0] - 1) / range)); m_high = m_low + (range * m_position_cum[position ]) / m_position_cum[0]; m_low += (range * m_position_cum[position + 1]) / m_position_cum[0]; for ( ; ; ) { if (m_low >= Q2) { m_value -= Q2; m_low -= Q2; m_high -= Q2; } else if (m_low >= Q1 && m_high <= Q3) { m_value -= Q1; m_low -= Q1; m_high -= Q1; } else if (m_high > Q2) break; m_low += m_low; m_high += m_high; m_value = 2 * m_value + GetBit(); } return position; } /********** Encode and Decode **********/ void CLzariEx::Encode(void) { int i, c, len, r, s, last_match_length; m_textsize = m_nSrcLength; m_vtDestData.resize(sizeof(m_textsize)); memcpy(&m_vtDestData[0], &m_textsize, sizeof(m_textsize)); m_codesize += sizeof(m_textsize); if (m_textsize == 0) return; m_nInCur = 0; m_textsize = 0; StartModel(); InitTree(); s = 0; r = N - F; for (i = s; i < r; i++) m_text_buf[i] = ' '; for (len = 0; len < F && ( m_nInCur < m_nSrcLength); len++) { c = m_pSrcData[m_nInCur++]; m_text_buf[r + len] = c; } m_textsize = len; for (i = 1; i <= F; i++) InsertNode(r - i); InsertNode(r); do { if (m_match_length > len) m_match_length = len; if (m_match_length <= THRESHOLD) { m_match_length = 1; EncodeChar(m_text_buf[r]); } else { EncodeChar(255 - THRESHOLD + m_match_length); EncodePosition(m_match_position - 1); } last_match_length = m_match_length; for (i = 0; i < last_match_length && (m_nInCur < m_nSrcLength) ; i++) { c = m_pSrcData[m_nInCur++]; DeleteNode(s); m_text_buf[s] = c; if (s < F - 1) m_text_buf[s + N] = c; s = (s + 1) & (N - 1); r = (r + 1) & (N - 1); InsertNode(r); } if ((m_textsize += i) > m_printcount) { printf("%12ld\r", m_textsize); m_printcount += 1024; } while (i++ < last_match_length) { DeleteNode(s); s = (s + 1) & (N - 1); r = (r + 1) & (N - 1); if (--len) InsertNode(r); } } while (len > 0); EncodeEnd(); printf("In : %lu bytes\n", m_textsize); printf("Out: %lu bytes\n", m_codesize); printf("Out/In: %.3f\n", (double)m_codesize / m_textsize); } void CLzariEx::Decode(void) { int i, j, k, r, c; unsigned long int count; if ( m_nSrcLength < sizeof(m_textsize) ) Error("Read Error"); /* read size of text */ memcpy(&m_textsize, m_pSrcData + m_nInCur, sizeof(m_textsize)); m_nDestLength = m_textsize; m_nInCur += sizeof(m_textsize); if (m_textsize == 0) return; StartDecode(); StartModel(); for (i = 0; i < N - F; i++) m_text_buf[i] = ' '; r = N - F; for (count = 0; count < m_textsize; ) { c = DecodeChar(); if (c < 256) { m_vtDestData.push_back(c); m_text_buf[r++] = c; r &= (N - 1); count++; } else { i = (r - DecodePosition() - 1) & (N - 1); j = c - 255 + THRESHOLD; for (k = 0; k < j; k++) { c = m_text_buf[(i + k) & (N - 1)]; m_vtDestData.push_back(c); m_text_buf[r++] = c; r &= (N - 1); count++; } } if (count > m_printcount) { printf("%12lu\r", count); m_printcount += 1024; } } printf("%12lu\n", count); } void CLzariEx::Compress(const BYTE *pInBuffer,int nInLength,const BYTE *&pOutBuffer ,int &nOutLength) { m_pSrcData = pInBuffer; m_nSrcLength = nInLength; m_nInCur = 0; Encode(); pOutBuffer = &m_vtDestData[0]; nOutLength = m_vtDestData.size(); } void CLzariEx::UnCompress(const BYTE *pInBuffer,int nInLength,const BYTE *&pOutBuffer ,int &nOutLength) { m_pSrcData = pInBuffer; m_nSrcLength = nInLength; m_nInCur = 0; TRACE("%d,%d,%d\t\n", m_nSrcLength,m_nInCur,m_textsize); Decode(); pOutBuffer = &m_vtDestData[0]; nOutLength = m_vtDestData.size(); //m_vtDestData.push_back(0); TRACE("%d,%d,%d\t\n", m_nSrcLength,m_nInCur,m_textsize); }