| 1 | /* |
|---|
| 2 | PowerDNS Versatile Database Driven Nameserver |
|---|
| 3 | Copyright (C) 2002 PowerDNS.COM BV |
|---|
| 4 | |
|---|
| 5 | This program is free software; you can redistribute it and/or modify |
|---|
| 6 | it under the terms of the GNU General Public License as published by |
|---|
| 7 | the Free Software Foundation; either version 2 of the License, or |
|---|
| 8 | (at your option) any later version. |
|---|
| 9 | |
|---|
| 10 | This program is distributed in the hope that it will be useful, |
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 13 | GNU General Public License for more details. |
|---|
| 14 | |
|---|
| 15 | You should have received a copy of the GNU General Public License |
|---|
| 16 | along with this program; if not, write to the Free Software |
|---|
| 17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|---|
| 18 | */ |
|---|
| 19 | #include <string> |
|---|
| 20 | #include "huffman.hh" |
|---|
| 21 | #include <bitset> |
|---|
| 22 | #include <map> |
|---|
| 23 | #include <sstream> |
|---|
| 24 | #include <stdlib.h> |
|---|
| 25 | #include <unistd.h> |
|---|
| 26 | #include <iostream> |
|---|
| 27 | #include <algorithm> |
|---|
| 28 | #include <utility> |
|---|
| 29 | |
|---|
| 30 | void HuffmanCodec::set(char c,const string &code) |
|---|
| 31 | { |
|---|
| 32 | d_dict[c]=code; |
|---|
| 33 | } |
|---|
| 34 | |
|---|
| 35 | HuffmanCodec::HuffmanCodec() |
|---|
| 36 | { |
|---|
| 37 | d_dict.clear(); |
|---|
| 38 | set('6',"0000"); |
|---|
| 39 | set('5',"0001"); |
|---|
| 40 | set(0,"0010"); |
|---|
| 41 | set('3',"0011"); |
|---|
| 42 | set('4',"0100"); |
|---|
| 43 | set('s',"0101"); |
|---|
| 44 | set('n',"011"); |
|---|
| 45 | set('c',"100000"); |
|---|
| 46 | set('u',"100001"); |
|---|
| 47 | set('-',"1000100"); |
|---|
| 48 | set('1',"1000101"); |
|---|
| 49 | set('f',"1000110"); |
|---|
| 50 | set('j',"10001110"); |
|---|
| 51 | set('9',"1000111100"); |
|---|
| 52 | set('*',"100011110100"); |
|---|
| 53 | set('q',"100011110101"); |
|---|
| 54 | set('7',"10001111011"); |
|---|
| 55 | set('8',"100011111"); |
|---|
| 56 | set('o',"10010"); |
|---|
| 57 | set('t',"10011"); |
|---|
| 58 | set('e',"1010"); |
|---|
| 59 | set('a',"10110"); |
|---|
| 60 | set('r',"10111"); |
|---|
| 61 | set('d',"110000"); |
|---|
| 62 | set('2',"1100010"); |
|---|
| 63 | set('k',"1100011"); |
|---|
| 64 | set('.',"110010"); |
|---|
| 65 | set('v',"1100110"); |
|---|
| 66 | set('w',"1100111"); |
|---|
| 67 | set('i',"1101"); |
|---|
| 68 | set('l',"111000"); |
|---|
| 69 | set('p',"1110010"); |
|---|
| 70 | set('b',"1110011"); |
|---|
| 71 | set('z',"111010000"); |
|---|
| 72 | set('y',"111010001"); |
|---|
| 73 | set('x',"11101001"); |
|---|
| 74 | set('h',"1110101"); |
|---|
| 75 | set('m',"1110110"); |
|---|
| 76 | set('g',"1110111"); |
|---|
| 77 | set('0',"1111"); |
|---|
| 78 | |
|---|
| 79 | d_min=10000; |
|---|
| 80 | d_max=0; |
|---|
| 81 | d_rdict.resize(128); |
|---|
| 82 | for(map<char,string>::const_iterator i=d_dict.begin();i!=d_dict.end();++i) { |
|---|
| 83 | d_min=min(d_min,i->second.length()); |
|---|
| 84 | d_max=max(d_max,i->second.length()); |
|---|
| 85 | |
|---|
| 86 | (d_rdict[i->second.length()])[i->second]=i->first; |
|---|
| 87 | } |
|---|
| 88 | d_last_compressed=d_last_out=""; |
|---|
| 89 | d_passthrough=false; |
|---|
| 90 | } |
|---|
| 91 | |
|---|
| 92 | void HuffmanCodec::passthrough(bool shoulddo) |
|---|
| 93 | { |
|---|
| 94 | d_passthrough=shoulddo; |
|---|
| 95 | } |
|---|
| 96 | |
|---|
| 97 | |
|---|
| 98 | // Bitify input: 1001101110101001000101 |
|---|
| 99 | //Decode got offered: '1001101110101001' |
|---|
| 100 | |
|---|
| 101 | |
|---|
| 102 | void HuffmanCodec::decode(const string &compressed, string &out) |
|---|
| 103 | { |
|---|
| 104 | if(d_passthrough) { |
|---|
| 105 | out=compressed; |
|---|
| 106 | return; |
|---|
| 107 | } |
|---|
| 108 | if(compressed==d_last_compressed) { |
|---|
| 109 | out=d_last_out; |
|---|
| 110 | return; |
|---|
| 111 | } |
|---|
| 112 | string full; |
|---|
| 113 | |
|---|
| 114 | out=""; |
|---|
| 115 | unbitify(compressed, full); |
|---|
| 116 | // cout<<"Decode got offered: '"<<full<<"'"<<endl; |
|---|
| 117 | |
|---|
| 118 | unsigned int pos=0; |
|---|
| 119 | size_t cleft=full.length(); |
|---|
| 120 | size_t mlen; |
|---|
| 121 | out.reserve(full.length()/5); |
|---|
| 122 | while(cleft) { |
|---|
| 123 | map<string,char>::const_iterator i; |
|---|
| 124 | |
|---|
| 125 | for(mlen=d_min;mlen<=cleft && mlen<=d_max;++mlen) { |
|---|
| 126 | if(d_rdict[mlen].empty()) |
|---|
| 127 | continue; |
|---|
| 128 | |
|---|
| 129 | i=d_rdict[mlen].find(full.substr(pos,mlen)); |
|---|
| 130 | |
|---|
| 131 | if(i!=d_rdict[mlen].end()) { // match |
|---|
| 132 | if(!i->second) { |
|---|
| 133 | d_last_compressed=compressed; |
|---|
| 134 | d_last_out=out; |
|---|
| 135 | return; |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | out.append(1,i->second); |
|---|
| 139 | |
|---|
| 140 | pos+=mlen; |
|---|
| 141 | cleft-=mlen; |
|---|
| 142 | break; |
|---|
| 143 | } |
|---|
| 144 | } |
|---|
| 145 | } |
|---|
| 146 | if(cleft) |
|---|
| 147 | throw AhuException("Unable to parse huffman symbol "+full.substr(pos)); |
|---|
| 148 | d_last_compressed=compressed; |
|---|
| 149 | d_last_out=out; |
|---|
| 150 | } |
|---|
| 151 | |
|---|
| 152 | void HuffmanCodec::encode(const string &in, string &out) |
|---|
| 153 | { |
|---|
| 154 | if(d_passthrough) { |
|---|
| 155 | out=in; |
|---|
| 156 | return; |
|---|
| 157 | } |
|---|
| 158 | string full; |
|---|
| 159 | for(string::const_iterator i=in.begin();i!=in.end();++i) { |
|---|
| 160 | map<char,string>::const_iterator j=d_dict.find(tolower(*i)); |
|---|
| 161 | if(j==d_dict.end()) { |
|---|
| 162 | string c; |
|---|
| 163 | char cc=tolower(*i); |
|---|
| 164 | c.append(1,cc); |
|---|
| 165 | throw AhuException("Trying to huffman encode an unknown symbol '"+c+"'"); |
|---|
| 166 | } |
|---|
| 167 | full.append(j->second); |
|---|
| 168 | } |
|---|
| 169 | full.append(d_dict[0]); |
|---|
| 170 | bitify(full,out); |
|---|
| 171 | // cout<<"full: "<<full<<endl; |
|---|
| 172 | } |
|---|
| 173 | |
|---|
| 174 | void HuffmanCodec::bitify(const string &full, string &out) |
|---|
| 175 | { |
|---|
| 176 | unsigned char bitpos=0; |
|---|
| 177 | unsigned char curbyte=0; |
|---|
| 178 | // cout<<"Bitify input: "<<full<<endl; |
|---|
| 179 | for(string::const_iterator i=full.begin();i!=full.end();++i) { |
|---|
| 180 | curbyte|= (*i=='1')<<(7-bitpos); |
|---|
| 181 | if(bitpos++==7) { |
|---|
| 182 | out.append(1,curbyte); |
|---|
| 183 | bitpos=0; |
|---|
| 184 | curbyte=0; |
|---|
| 185 | } |
|---|
| 186 | } |
|---|
| 187 | out.append(1,curbyte); |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | void HuffmanCodec::unbitify(const string &in, string &full) |
|---|
| 191 | { |
|---|
| 192 | bitset<8> byte; |
|---|
| 193 | ostringstream os; |
|---|
| 194 | full.reserve(in.length()*8); |
|---|
| 195 | for(string::const_iterator i=in.begin();i!=in.end();++i) { |
|---|
| 196 | byte=*i; |
|---|
| 197 | os<<byte; |
|---|
| 198 | } |
|---|
| 199 | full.append(os.str()); |
|---|
| 200 | } |
|---|
| 201 | |
|---|
| 202 | #if 0 |
|---|
| 203 | int main(int argc, char **argv) |
|---|
| 204 | { |
|---|
| 205 | string in(argv[1]); |
|---|
| 206 | string compressed; |
|---|
| 207 | |
|---|
| 208 | try { |
|---|
| 209 | HuffmanCodec hc; |
|---|
| 210 | // hc.initDictionary(dict); |
|---|
| 211 | // cout<<"in: "<<in.length()<<endl; |
|---|
| 212 | hc.encode(in,compressed); |
|---|
| 213 | // cout<<"compressed: "<<compressed.length()<<endl; |
|---|
| 214 | // cout<<"Compressed: '"<<compressed<<"'"<<endl; |
|---|
| 215 | |
|---|
| 216 | string out; |
|---|
| 217 | hc.decode(compressed,out); |
|---|
| 218 | |
|---|
| 219 | cout<<"'"<<out<<"'"<<endl; |
|---|
| 220 | } |
|---|
| 221 | catch(AhuException &ae) { |
|---|
| 222 | cerr<<"Fatal error: "<<ae.reason<<endl; |
|---|
| 223 | } |
|---|
| 224 | } |
|---|
| 225 | #endif |
|---|