123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678 |
- #include "StdAfx.h"
- #include "Lzari.h"
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- CLzariEx::CLzariEx(void)
- :m_nInCur(0)
- ,m_pSrcData(NULL)
- ,m_textsize(0)
- ,m_codesize(0)
- ,m_printcount(0)
- ,m_low(0)
- ,m_high(Q4)
- ,m_value(0)
- ,m_shifts(0)
- {
- m_match_length = 0;
- m_match_position = 0;
- m_nDestLength = 0;
- m_nSrcLength = 0;
- m_buffer_putbit = 0;
- m_mask_putbit = 128;
- m_buffer_getbit = 0;
- m_mask_getbit = 0;
- memset(m_dad, 0, N+1);
- memset(m_lson, 0, N+1);
- memset(m_rson, 0, N+257);
- memset(m_char_to_sym, 0, N);
- memset(m_sym_to_char, 0, N_CHAR+1);
- memset(m_sym_freq, 0, N_CHAR+1);
- memset(m_sym_cum, 0, N_CHAR+1);
- memset(m_position_cum, 0, N+1);
- memset(m_text_buf, 0, N+F-1);
- memset(m_text_buf, 0, N+F-1);
- }
- CLzariEx::~CLzariEx(void)
- {
- m_vtDestData.clear();
- }
- void CLzariEx::Release()
- {
- m_nInCur = 0;
- m_pSrcData = NULL;
- m_textsize = 0;
- m_codesize = 0;
- m_printcount = 0;
- m_low = 0;
- m_high = Q4;
- m_value = 0;
- m_shifts = 0;
- m_match_length = 0;
- m_match_position = 0;
- m_nDestLength = 0;
- m_nSrcLength = 0;
- m_buffer_putbit = 0;
- m_mask_putbit = 128;
- m_buffer_getbit = 0;
- m_mask_getbit = 0;
- memset(m_dad, 0, N+1);
- memset(m_lson, 0, N+1);
- memset(m_rson, 0, N+257);
- memset(m_char_to_sym, 0, N);
- memset(m_sym_to_char, 0, N_CHAR+1);
- memset(m_sym_freq, 0, N_CHAR+1);
- memset(m_sym_cum, 0, N_CHAR+1);
- memset(m_position_cum, 0, N+1);
- memset(m_text_buf, 0, N+F-1);
- memset(m_text_buf, 0, N+F-1);
- m_vtDestData.clear();
- }
- void CLzariEx::Error(char *message)
- {
- printf("\n%s\n", message);
- exit(EXIT_FAILURE);
- }
- void CLzariEx::PutBit(int bit)
- {
- if (bit)
- m_buffer_putbit |= m_mask_putbit;
- if ((m_mask_putbit >>= 1) == 0)
- {
- m_vtDestData.push_back(m_buffer_putbit);
- m_buffer_putbit = 0;
- m_mask_putbit = 128;
- m_codesize++;
- }
- }
- void CLzariEx::FlushBitBuffer()
- {
- for ( int i = 0; i < 7; i++ )
- PutBit(0);
- }
- int CLzariEx::GetBit(void)
- {
- if ((m_mask_getbit >>= 1) == 0)
- {
- if ( m_nInCur < m_nSrcLength )
- m_buffer_getbit = m_pSrcData[m_nInCur++];
- m_mask_getbit = 128;
- }
- return ((m_buffer_getbit & m_mask_getbit) != 0);
- }
- void CLzariEx::InitTree(void) /* Initialize trees */
- {
- int i;
- /* For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right and
- left children of node i. These nodes need not be initialized.
- Also, m_dad[i] is the parent of node i. These are initialized to
- NIL (= N), which stands for 'not used.'
- For i = 0 to 255, m_rson[N + i + 1] is the root of the tree
- for strings that begin with character i. These are initialized
- to NIL. Note there are 256 trees. */
- for (i = N + 1; i <= N + 256; i++)
- m_rson[i] = NIL; /* root */
- for (i = 0; i < N; i++)
- m_dad[i] = NIL; /* node */
- }
- void CLzariEx::InsertNode(int r)
- /* Inserts string of length F, m_text_buf[r..r+F-1], into one of the
- trees (m_text_buf[r]'th tree) and returns the longest-match position
- and length via the global variables m_match_position and m_match_length.
- If m_match_length = F, then removes the old node in favor of the new
- one, because the old one will be deleted sooner.
- Note r plays double role, as tree node and position in buffer. */
- {
- int i, p, cmp, temp;
- unsigned char *key;
- cmp = 1;
- key = &m_text_buf[r];
- p = N + 1 + key[0];
- m_rson[r] = m_lson[r] = NIL;
- m_match_length = 0;
- for ( ; ; )
- {
- if (cmp >= 0)
- {
- if (m_rson[p] != NIL)
- p = m_rson[p];
- else
- {
- m_rson[p] = r;
- m_dad[r] = p;
- return;
- }
- }
- else
- {
- if (m_lson[p] != NIL)
- p = m_lson[p];
- else
- {
- m_lson[p] = r;
- m_dad[r] = p;
- return;
- }
- }
- for (i = 1; i < F; i++)
- if ((cmp = key[i] - m_text_buf[p + i]) != 0)
- break;
- if (i > THRESHOLD)
- {
- if (i > m_match_length)
- {
- m_match_position = (r - p) & (N - 1);
- if ((m_match_length = i) >= F)
- break;
- }
- else if (i == m_match_length)
- {
- if ((temp = (r - p) & (N - 1)) < m_match_position)
- m_match_position = temp;
- }
- }
- }
- m_dad[r] = m_dad[p];
- m_lson[r] = m_lson[p];
- m_rson[r] = m_rson[p];
- m_dad[m_lson[p]] = r;
- m_dad[m_rson[p]] = r;
- if (m_rson[m_dad[p]] == p)
- m_rson[m_dad[p]] = r;
- else
- m_lson[m_dad[p]] = r;
- m_dad[p] = NIL; /* remove p */
- }
- void CLzariEx::DeleteNode(int p) /* Delete node p from tree */
- {
- int q;
- if (m_dad[p] == NIL)
- return; /* not in tree */
- if (m_rson[p] == NIL)
- q = m_lson[p];
- else if (m_lson[p] == NIL)
- q = m_rson[p];
- else
- {
- q = m_lson[p];
- if (m_rson[q] != NIL)
- {
- do
- {
- q = m_rson[q];
- } while (m_rson[q] != NIL);
- m_rson[m_dad[q]] = m_lson[q];
- m_dad[m_lson[q]] = m_dad[q];
- m_lson[q] = m_lson[p];
- m_dad[m_lson[p]] = q;
- }
- m_rson[q] = m_rson[p];
- m_dad[m_rson[p]] = q;
- }
- m_dad[q] = m_dad[p];
- if (m_rson[m_dad[p]] == p)
- m_rson[m_dad[p]] = q;
- else
- m_lson[m_dad[p]] = q;
- m_dad[p] = NIL;
- }
- void CLzariEx::StartModel(void) /* Initialize model */
- {
- int ch, sym, i;
- m_sym_cum[N_CHAR] = 0;
- for (sym = N_CHAR; sym >= 1; sym--)
- {
- ch = sym - 1;
- m_char_to_sym[ch] = sym;
- m_sym_to_char[sym] = ch;
- m_sym_freq[sym] = 1;
- m_sym_cum[sym - 1] = m_sym_cum[sym] + m_sym_freq[sym];
- }
- m_sym_freq[0] = 0; /* sentinel (!= m_sym_freq[1]) */
- m_position_cum[N] = 0;
- for (i = N; i >= 1; i--)
- m_position_cum[i - 1] = m_position_cum[i] + 10000 / (i + 200);
- /* empirical distribution function (quite tentative) */
- /* Please devise a better mechanism! */
- }
- void CLzariEx::UpdateModel(int sym)
- {
- int i, c, ch_i, ch_sym;
- if (m_sym_cum[0] >= MAX_CUM)
- {
- c = 0;
- for (i = N_CHAR; i > 0; i--)
- {
- m_sym_cum[i] = c;
- c += (m_sym_freq[i] = (m_sym_freq[i] + 1) >> 1);
- }
- m_sym_cum[0] = c;
- }
- for (i = sym; m_sym_freq[i] == m_sym_freq[i - 1]; i--)
- ;
- if (i < sym)
- {
- ch_i = m_sym_to_char[i];
- ch_sym = m_sym_to_char[sym];
- m_sym_to_char[i] = ch_sym;
- m_sym_to_char[sym] = ch_i;
- m_char_to_sym[ch_i] = sym;
- m_char_to_sym[ch_sym] = i;
- }
- m_sym_freq[i]++;
- while (--i >= 0)
- m_sym_cum[i]++;
- }
- void CLzariEx::Output(int bit) /* Output 1 bit, followed by its complements */
- {
- PutBit(bit);
- for ( ; m_shifts > 0; m_shifts--)
- PutBit(! bit);
- }
- void CLzariEx::EncodeChar(int ch)
- {
- int sym;
- unsigned long int range;
- sym = m_char_to_sym[ch];
- range = m_high - m_low;
- m_high = m_low + (range * m_sym_cum[sym - 1]) / m_sym_cum[0];
- m_low += (range * m_sym_cum[sym ]) / m_sym_cum[0];
- for ( ; ; )
- {
- if (m_high <= Q2)
- Output(0);
- else if (m_low >= Q2)
- {
- Output(1);
- m_low -= Q2;
- m_high -= Q2;
- }
- else if (m_low >= Q1 && m_high <= Q3)
- {
- m_shifts++;
- m_low -= Q1;
- m_high -= Q1;
- }
- else
- break;
- m_low += m_low;
- m_high += m_high;
- }
- UpdateModel(sym);
- }
- void CLzariEx::EncodePosition(int position)
- {
- unsigned long int range;
- range = m_high - m_low;
- m_high = m_low + (range * m_position_cum[position ]) / m_position_cum[0];
- m_low += (range * m_position_cum[position + 1]) / m_position_cum[0];
- for ( ; ; )
- {
- if (m_high <= Q2)
- Output(0);
- else if (m_low >= Q2)
- {
- Output(1);
- m_low -= Q2;
- m_high -= Q2;
- }
- else if (m_low >= Q1 && m_high <= Q3)
- {
- m_shifts++;
- m_low -= Q1;
- m_high -= Q1;
- } else break;
- m_low += m_low;
- m_high += m_high;
- }
- }
- void CLzariEx::EncodeEnd(void)
- {
- m_shifts++;
- if (m_low < Q1)
- Output(0);
- else
- Output(1);
- FlushBitBuffer(); /* flush bits remaining in buffer */
- }
- int CLzariEx::BinarySearchSym(unsigned int x)
- /* 1 if x >= m_sym_cum[1],
- N_CHAR if m_sym_cum[N_CHAR] > x,
- i such that m_sym_cum[i - 1] > x >= m_sym_cum[i] otherwise */
- {
- int i, j, k;
- i = 1; j = N_CHAR;
- while (i < j)
- {
- k = (i + j) / 2;
- if (m_sym_cum[k] > x)
- i = k + 1;
- else
- j = k;
- }
- return i;
- }
- int CLzariEx::BinarySearchPos(unsigned int x)
- /* 0 if x >= m_position_cum[1],
- N - 1 if m_position_cum[N] > x,
- i such that m_position_cum[i] > x >= m_position_cum[i + 1] otherwise */
- {
- int i, j, k;
- i = 1; j = N;
- while (i < j)
- {
- k = (i + j) / 2;
- if (m_position_cum[k] > x)
- i = k + 1;
- else
- j = k;
- }
- return i - 1;
- }
- void CLzariEx::StartDecode(void)
- {
- int i;
- for (i = 0; i < M + 2; i++)
- m_value = 2 * m_value + GetBit();
- }
- int CLzariEx::DecodeChar(void)
- {
- int sym, ch;
- unsigned long int range;
- range = m_high - m_low;
- sym = BinarySearchSym((unsigned int)(((m_value - m_low + 1) * m_sym_cum[0] - 1) / range));
- m_high = m_low + (range * m_sym_cum[sym - 1]) / m_sym_cum[0];
- m_low += (range * m_sym_cum[sym ]) / m_sym_cum[0];
- for ( ; ; )
- {
- if (m_low >= Q2)
- {
- m_value -= Q2;
- m_low -= Q2;
- m_high -= Q2;
- }
- else if (m_low >= Q1 && m_high <= Q3)
- {
- m_value -= Q1;
- m_low -= Q1;
- m_high -= Q1;
- }
- else if (m_high > Q2)
- break;
- m_low += m_low;
- m_high += m_high;
- m_value = 2 * m_value + GetBit();
- }
- ch = m_sym_to_char[sym];
- UpdateModel(sym);
- return ch;
- }
- int CLzariEx::DecodePosition(void)
- {
- int position;
- unsigned long int range;
- range = m_high - m_low;
- position = BinarySearchPos((unsigned int)(((m_value - m_low + 1) * m_position_cum[0] - 1) / range));
- m_high = m_low + (range * m_position_cum[position ]) / m_position_cum[0];
- m_low += (range * m_position_cum[position + 1]) / m_position_cum[0];
- for ( ; ; )
- {
- if (m_low >= Q2) {
- m_value -= Q2;
- m_low -= Q2;
- m_high -= Q2;
- }
- else if (m_low >= Q1 && m_high <= Q3)
- {
- m_value -= Q1;
- m_low -= Q1;
- m_high -= Q1;
- }
- else if (m_high > Q2)
- break;
- m_low += m_low;
- m_high += m_high;
- m_value = 2 * m_value + GetBit();
- }
- return position;
- }
- /********** Encode and Decode **********/
- void CLzariEx::Encode(void)
- {
- int i, c, len, r, s, last_match_length;
- m_textsize = m_nSrcLength;
- m_vtDestData.resize(sizeof(m_textsize));
- memcpy(&m_vtDestData[0], &m_textsize, sizeof(m_textsize));
- m_codesize += sizeof(m_textsize);
- if (m_textsize == 0)
- return;
- m_nInCur = 0;
- m_textsize = 0;
- StartModel();
- InitTree();
- s = 0; r = N - F;
- for (i = s; i < r; i++)
- m_text_buf[i] = ' ';
- for (len = 0; len < F && ( m_nInCur < m_nSrcLength); len++)
- {
- c = m_pSrcData[m_nInCur++];
- m_text_buf[r + len] = c;
- }
- m_textsize = len;
- for (i = 1; i <= F; i++)
- InsertNode(r - i);
- InsertNode(r);
- do
- {
- if (m_match_length > len) m_match_length = len;
- if (m_match_length <= THRESHOLD)
- {
- m_match_length = 1;
- EncodeChar(m_text_buf[r]);
- }
- else
- {
- EncodeChar(255 - THRESHOLD + m_match_length);
- EncodePosition(m_match_position - 1);
- }
- last_match_length = m_match_length;
- for (i = 0; i < last_match_length && (m_nInCur < m_nSrcLength) ; i++)
- {
- c = m_pSrcData[m_nInCur++];
- DeleteNode(s);
- m_text_buf[s] = c;
- if (s < F - 1) m_text_buf[s + N] = c;
- s = (s + 1) & (N - 1);
- r = (r + 1) & (N - 1);
- InsertNode(r);
- }
- if ((m_textsize += i) > m_printcount)
- {
- printf("%12ld\r", m_textsize);
- m_printcount += 1024;
- }
- while (i++ < last_match_length)
- {
- DeleteNode(s);
- s = (s + 1) & (N - 1);
- r = (r + 1) & (N - 1);
- if (--len) InsertNode(r);
- }
- } while (len > 0);
- EncodeEnd();
- printf("In : %lu bytes\n", m_textsize);
- printf("Out: %lu bytes\n", m_codesize);
- printf("Out/In: %.3f\n", (double)m_codesize / m_textsize);
- }
- void CLzariEx::Decode(void)
- {
- int i, j, k, r, c;
- unsigned long int count;
- if ( m_nSrcLength < sizeof(m_textsize) )
- Error("Read Error"); /* read size of text */
- memcpy(&m_textsize, m_pSrcData + m_nInCur, sizeof(m_textsize));
- m_nDestLength = m_textsize;
- m_nInCur += sizeof(m_textsize);
- if (m_textsize == 0)
- return;
- StartDecode();
- StartModel();
- for (i = 0; i < N - F; i++)
- m_text_buf[i] = ' ';
- r = N - F;
- for (count = 0; count < m_textsize; )
- {
- c = DecodeChar();
- if (c < 256)
- {
- m_vtDestData.push_back(c);
- m_text_buf[r++] = c;
- r &= (N - 1);
- count++;
- }
- else
- {
- i = (r - DecodePosition() - 1) & (N - 1);
- j = c - 255 + THRESHOLD;
- for (k = 0; k < j; k++)
- {
- c = m_text_buf[(i + k) & (N - 1)];
- m_vtDestData.push_back(c);
- m_text_buf[r++] = c;
- r &= (N - 1);
- count++;
- }
- }
- if (count > m_printcount)
- {
- printf("%12lu\r", count);
- m_printcount += 1024;
- }
- }
- printf("%12lu\n", count);
- }
- void CLzariEx::Compress(const BYTE *pInBuffer,int nInLength,const BYTE *&pOutBuffer ,int &nOutLength)
- {
- m_pSrcData = pInBuffer;
- m_nSrcLength = nInLength;
- m_nInCur = 0;
- Encode();
- pOutBuffer = &m_vtDestData[0];
- nOutLength = m_vtDestData.size();
- }
- void CLzariEx::UnCompress(const BYTE *pInBuffer,int nInLength,const BYTE *&pOutBuffer ,int &nOutLength)
- {
- m_pSrcData = pInBuffer;
- m_nSrcLength = nInLength;
- m_nInCur = 0;
- TRACE("%d,%d,%d\t\n", m_nSrcLength,m_nInCur,m_textsize);
- Decode();
- pOutBuffer = &m_vtDestData[0];
- nOutLength = m_vtDestData.size();
- //m_vtDestData.push_back(0);
- TRACE("%d,%d,%d\t\n", m_nSrcLength,m_nInCur,m_textsize);
- }
|