编程知识 cdmana.com

使用哈夫曼算法对文件进行压缩和解压

使用哈夫曼算法对文件进行解压和压缩

前言:笔者是一名在读大二的学生,用哈夫曼算法对文件进行压缩和解压是老师布置的实验课作业。码这个程序花了很多时间,但是我也在其中学到了很多东西,写这种类似于工程类和写算法的程序感觉有很大的不一样。

part1 原理

对文件内出现的字符进行统计,以出现的次数为关键字,建立哈夫曼树,实现对字符的编码,将字符转换成一个01序列,因为一个字符占用一个字节,8个bit, 而一个字节可以存八位的01编码,所以用位编码替换文件内的字符,既完成了文件的压缩。为了能够解码,我们要把哈夫曼树和01序列一起存进文件中。在这里插入图片描述
哈夫曼树
解码时,我们将存进压缩文件里的哈夫曼树还原,通过文件里的01序列对哈夫曼树进行查找,即可还原出原文件。

part2 压缩代码实现

1.统计文件字符出现次数

统计文本中字符出现频率

int Count(string op, string path1, string path2, int tong[]) {
   
   
        // TODO:统计文件1字符出现频率
        // path1是源文件,path2是目标文件
        ifstream instr(path1, ios::in | ios::binary);
        unsigned int bytebuff = 0;
        char ch;
        while (instr.get(ch)) {
   
   
            bytebuff =(int)(unsigned char)ch;
            tong[bytebuff]++;
        } //统计weight完成!
        int LeafNumber = 0;
        for (int i = 0; i <= 256; i++) {
   
   
            if (tong[i] != 0)
                LeafNumber++;
        }
        instr.close();
        cerr << "Count Completed" << endl;
        return LeafNumber;
    }

使用get()一次从文件输入流里读取一个字节,相当于一个char类型。使用桶计数法统计字符出现次数。这里要用unsigned char,因为有些字符的编码是大于127的,符号位为1,不适用unsigned读出来会是负数。

2.建立哈夫曼树

struct HuffmanNode {
   
   
    int info; //存
    int index;
    int weight;
    int parent; //存
    int left;
    int right;
    char side; //存
    string BinaryCode;
    friend bool operator>(HuffmanNode f1, HuffmanNode f2) {
   
   
        return f1.weight > f2.weight;
    }
};

哈夫曼树的节点信息
个人习惯喜欢把节点名和结构体指针换名字

typedef HuffmanNode Node;
typedef HuffmanNode *Tree;

创建哈夫曼树:

int CreatHuffmanTree(int tong[], int LeafNumber, Node HuffmanTree[]) {
   
   
        // TODO:创建哈夫曼树
        int k = 0;
        priority_queue<Node, vector<Node>, greater<Node>> pq;
        // Tree HuffmanTree = new Node[2 * LeafNumber - 1];
        // 0--->LeafNumber - 1            是叶节点
        // leafNumber--->2*LeafNumber - 2 是根节点
        for (int i = 0; i <= 256; i++) {
   
   
            if (tong[i]) {
   
   
                HuffmanTree[k].info = i;
                HuffmanTree[k].index = k;
                HuffmanTree[k].left = HuffmanTree[k].right =
                    HuffmanTree[k].parent = -1;
                HuffmanTree[k].weight = tong[i];
                pq.push(HuffmanTree[k]);
                k++;
            }
        }
        int j = LeafNumber;
        //通过优先队列构建哈夫曼树
        while (pq.size() > 1) {
   
   
            Node t1 = pq.top();
            pq.pop();
            Node t2 = pq.top();
            pq.pop();
            HuffmanTree[t1.index].parent = j;
            HuffmanTree[t2.index].parent = j;
            HuffmanTree[j].index = j;
            HuffmanTree[j].parent = -1;
            // cout<<HuffmanTree[j].index;
            HuffmanTree[j].left = t1.index;
            // cout<<HuffmanTree[j].left;
            HuffmanTree[j].right = t2.index;
            // cout<<HuffmanTree[j].right;
            HuffmanTree[j].weight = t1.weight + t2.weight;
            HuffmanTree[j].info = -127;
            pq.push(HuffmanTree[j]);
            j++;
        }
        j--;
        Node HuffmanTreeHead = pq.top();
        HuffmanTreeHead.parent = -127;
        HuffmanTree[j] = HuffmanTreeHead;
        HuffmanTree[j].info = -127;
        pq.pop();
        cerr << "Creat Huffman Tree Completed" << endl;
        return 0;
    }

这里使用了stl容器优先队列priority_queue。优先队列,其底层是用堆来实现的。队首一定是当前队列中优先级最高的那一个。
因为哈夫曼树的节点不只有一个信息, 所以要使用优先队列对出现次数weight排序,要在结构体里重载运算符

friend bool operator>(HuffmanNode f1, HuffmanNode f2) {
   
   
        return f1.weight > f2.weight;
    }

3.对出现的字符编码

int GetCodeNode(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 2; i++) {
   
   
            if (HuffmanTree[i].info == -127)
                continue;
            int IndexForSearching = i;
            HuffmanTree[i].BinaryCode = "";
            int j = 0;
            while (HuffmanTree[IndexForSearching].parent != -127) {
   
   
                j = HuffmanTree[IndexForSearching].parent;
                if (HuffmanTree[j].left == IndexForSearching)
                    HuffmanTree[i].BinaryCode += '0';
                if (HuffmanTree[j].right == IndexForSearching)
                    HuffmanTree[i].BinaryCode += '1';
                IndexForSearching = j;
            }
            reverse(HuffmanTree[i].BinaryCode.begin(),
                    HuffmanTree[i].BinaryCode.end());
        }
        cerr << "Get Node Code Completed" << endl;
        return 0;
    }

目的是为了在下一步对文件进行编码的时候比较简便,但是这样会比较慢

4.按照对文件编码

string Encode(string path1, Node HuffmanTree[], int LeafNumber) {
   
   
        ifstream instr(path1, ios::in | ios::binary);
        char ch;
        unsigned int bytebuff = 0;
        int bitmask = 0x80;
        string HuffmanPath = "";
        while (instr.get(ch)) {
   
   
            bytebuff = (int)(unsigned char)ch;
            int value = bytebuff;
            for (int i = 0; i < LeafNumber; i++) {
   
   
                if (HuffmanTree[i].info == value) {
   
   
                    HuffmanPath += HuffmanTree[i].BinaryCode;
                    break;
                }
            }
        }
        cerr << "Encode Completed" << endl;
        return HuffmanPath;
    }

按照文件中的顺序,把文件的中的字符转换为01序列保存到字符串中。

5.把字符串里的01串转换

字符串里的0和1是以字符来存储的,一个字节存一个0或1,通过位运算,把字节里的八个位都存0和1,这样一个字节就可以存八个0和1

string SwitchStringToBinary(string HuffmanPath, int &Sign) {
   
   
        // TODO:将字符串里的01序列修改为bit
        // 最后一个字节要处理多余的0-->把0放后面
        string BinaryPath = "";
        int bytebuff = 0;
        int shiftcount = 0;
        for (int i = 0; i < HuffmanPath.size(); i++) {
   
   
            bytebuff += (HuffmanPath[i] == '1' ? 1 : 0);
            bytebuff <<= 1;
            shiftcount++;
            if (shiftcount == 8) {
   
   
                bytebuff >>= 1;
                BinaryPath += (char)bytebuff;
                bytebuff = 0;
                shiftcount = 0;
                if (i == HuffmanPath.size() - 1)
                    break;
                if (i + 8 > HuffmanPath.size()) {
   
   
                    i++;
                    while (i <= HuffmanPath.size() - 1) {
   
   
                        bytebuff += (HuffmanPath[i] == '1' ? 1 : 0);
                        bytebuff <<= 1;
                        shiftcount++;
                        i++;
                    }
                    bytebuff <<= 7 - shiftcount;
                    BinaryPath += (char)bytebuff;
                }
            }
        }
        Sign = 8 - shiftcount;
        cerr << "Switch String To Binary Completed" << endl;
        return BinaryPath;
    }

这里写的时候要注意,因为我们把字符串的时候是八个八个的去读,如果字符串的长度 % 8 != 0,那么字符串的末尾是凑不齐八位的,要特殊处理,这里我把有效的01串后面全填满0,补齐8位,然后用一个Sign来标记末尾多添加了几个0,把Sign一并存入到文件当中,这样解码的时候就可以把最后多余的0给处理掉了。

6.存入文件前的准备工作

这里我遇到的问题比较多,也是程序bug出现的主要原因。我们要尽可能的存入少的信息,占用少的空间,把我们的哈夫曼树给存入文件。要存哪些信息以便解码的时候能够完整的还原出来。在尝试了许多种算法以后(文末会介绍),写出了无数多个bug,我使用了比较笨比的方法~:
存节点的 parent(父节点/母节点),side(子节点是父节点的左子节点还是右子节点),info(叶节点对应的ASCII码)
获取节点的side

int GetSide(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 2; i++) {
   
   
            if (i == HuffmanTree[HuffmanTree[i].parent].left)
                HuffmanTree[i].side = 'l';
            if (i == HuffmanTree[HuffmanTree[i].parent].right)
                HuffmanTree[i].side = 'r';
        }
        return 0;
    }

7.写入文件

int WriteToFile(string path2, Node HuffmanTree[], int LeafNumber,
                    string BinaryPath, int Sign) {
   
   
        // path2是要写的目标文件
        //打开二进制文件输出流
        // ShowTable(HuffmanTree, LeafNumber);
        LeafNumber -= 1;
        ofstream outstr(path2, ios::binary);
        outstr.write(reinterpret_cast<char *>(&Sign), sizeof(char));
        outstr.write(reinterpret_cast<char *>(&LeafNumber), sizeof(char));
        LeafNumber += 1;
        for (int i = 0; i < LeafNumber; i++) {
   
   
            //获取要写的内容的地址,转换为char*
            HuffmanTree[i].parent -= LeafNumber;
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].info),
                         sizeof(char));
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].parent),
                         sizeof(char));
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].side),
                         sizeof(char));
        }
        for (int i = LeafNumber; i < 2 * LeafNumber - 1; i++) {
   
   
            HuffmanTree[i].parent -= LeafNumber;
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].parent),
                         sizeof(char));
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].side),
                         sizeof(char));
        }
        for (int i = 0; i < BinaryPath.size(); i++) {
   
   
            outstr.put(BinaryPath[i]);
        }
        outstr.close();
        cerr << "Write To File Completed" << endl;
        return 0;
    }

把要写的树信息和编码一并写入文件当中,我在编码头写入了我哈夫曼树的叶节点个数LeafNumber,还有上文中提到的Sign。接着就是哈夫曼树和文件的编码。

part3.解压代码实现

写完压缩以后,解压基本上是一马平川~。但是解压会遇到很多问题,要去压缩的代码里面去改。

1.读文件

Tree ReadFile(string path1, unsigned int &LeafNumber, string &SearchPath,
                  int &Sign) {
   
   
        // Sign是要删去末尾的几个0
        ifstream instr(path1, ios::in | ios::binary);
        SearchPath = "";
        char ch;
        instr.get(ch);
        Sign = ch;
        instr.get(ch);
        LeafNumber = (int)(unsigned char)ch;
        LeafNumber += 1;
        Tree HuffmanTree = new Node[2 * LeafNumber - 1];
        unsigned int num;
        for (int i = 0; i < LeafNumber; i++) {
   
   
            instr.get(ch);
            num = (int)(unsigned char)ch;
            HuffmanTree[i].info = num;
            instr.get(ch);
            num = (int)(unsigned char)ch;
            HuffmanTree[i].parent = num + LeafNumber;
            instr.get(ch);
            HuffmanTree[i].side = ch;
            //初始化其他信息
            HuffmanTree[i].BinaryCode = "";
            HuffmanTree[i].index = i;
            HuffmanTree[i].left = -1;
            HuffmanTree[i].right = -1;
            HuffmanTree[i].weight = -1;
        }
        for (int i = LeafNumber; i < 2 * LeafNumber - 1; i++) {
   
   
            instr.get(ch);
            num = (int)(unsigned char)ch;
            HuffmanTree[i].parent = num + LeafNumber;
            instr.get(ch);
            HuffmanTree[i].side = ch;
            //初始化其他信息
            HuffmanTree[i].info = -10000;
            HuffmanTree[i].BinaryCode = "";
            HuffmanTree[i].index = i;
            HuffmanTree[i].left = -1;
            HuffmanTree[i].right = -1;
            HuffmanTree[i].weight = -1;
        }
        int bitmask = 0x80;
        while (instr.get(ch)) {
   
   
            num = (int)(unsigned char)ch;
            while (bitmask != 0) {
   
   
                if ((bitmask & num) != 0) {
   
   
                    SearchPath += '1';
                }
                if ((bitmask & num) == 0) {
   
   
                    SearchPath += '0';
                }
                bitmask >>= 1;
            }
            bitmask = 0x80;
        }
        SearchPath.erase(SearchPath.end() - Sign, SearchPath.end());
        instr.close();
        cerr << "Read File Completed" << endl;
        return HuffmanTree;
    }

把整个压缩文件都读完,开辟空间保存树节点空间。把文件的编码部分保存到string方便使用。记得使用Sign把编码末尾多余的0去掉。

2.还原哈夫曼树

把刚刚存入线性表的节点建立父子(母子/父女/母女)关系

int BuiltHuffmanTree(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 2; i++) {
   
   
            if (HuffmanTree[i].side == 'l') {
   
   
                HuffmanTree[HuffmanTree[i].parent].left = i;
            }
            if (HuffmanTree[i].side == 'r') {
   
   
                HuffmanTree[HuffmanTree[i].parent].right = i;
            }
        }
        cerr << "Built Huffman Tree Completed" << endl;
        return 0;
    }

3.还原文件

根据字符串中的01序列,在哈夫曼树中查找,把找到的字符写入文件中就好啦!

int RestoreFile(string SearchPath, Node HuffmanTree[], string path2,
                    int LeafNumber) {
   
   
        ofstream outstr(path2, ios::out | ios::binary);
        int head = 2 * LeafNumber - 2, now = head;
        for (int i = 0; i < SearchPath.size(); i++) {
   
   
            if (SearchPath[i] == '0') {
   
   
                now = HuffmanTree[now].left;
            }
            if (SearchPath[i] == '1') {
   
   
                now = HuffmanTree[now].right;
            }
            if (HuffmanTree[now].left == -1 && HuffmanTree[now].right == -1) {
   
   
                char res = HuffmanTree[now].info;
                outstr.put(res);
                now = head;
            }
        }
        outstr.close();
        cerr << "Restore File Completed" << endl;
        return 0;
    }

part4.全部代码

// writen by spln
// spln@foxmail.com
#include <bits/stdc++.h>
using namespace std;
struct HuffmanNode {
   
   
    int info; //存
    int index;
    int weight;
    int parent; //存
    int left;
    int right;
    char side; //存
    string BinaryCode;
    friend bool operator>(HuffmanNode f1, HuffmanNode f2) {
   
   
        return f1.weight > f2.weight;
    }
};
typedef HuffmanNode Node;
typedef HuffmanNode *Tree;
int ShowHelp() {
   
   
    cerr << "输入错误,请按要求进行输入:" << endl;
    cerr << "-z/-x 文件名1 文件名2" << endl;
    return 0;
}
class Compression {
   
   
  public:
    int CreatHuffmanTree(int tong[], int LeafNumber, Node HuffmanTree[]) {
   
   
        // TODO:创建哈夫曼树
        int k = 0;
        priority_queue<Node, vector<Node>, greater<Node>> pq;
        // Tree HuffmanTree = new Node[2 * LeafNumber - 1];
        // 0--->LeafNumber - 1            是叶节点
        // laefNumber--->2*LeafNumber - 2 是根节点
        for (int i = 0; i <= 256; i++) {
   
   
            if (tong[i]) {
   
   
                HuffmanTree[k].info = i;
                HuffmanTree[k].index = k;
                HuffmanTree[k].left = HuffmanTree[k].right =
                    HuffmanTree[k].parent = -1;
                HuffmanTree[k].weight = tong[i];
                pq.push(HuffmanTree[k]);
                k++;
            }
        }
        int j = LeafNumber;
        //通过优先队列构建哈夫曼树
        while (pq.size() > 1) {
   
   
            Node t1 = pq.top();
            pq.pop();
            Node t2 = pq.top();
            pq.pop();
            HuffmanTree[t1.index].parent = j;
            HuffmanTree[t2.index].parent = j;
            HuffmanTree[j].index = j;
            HuffmanTree[j].parent = -1;
            // cout<<HuffmanTree[j].index;
            HuffmanTree[j].left = t1.index;
            // cout<<HuffmanTree[j].left;
            HuffmanTree[j].right = t2.index;
            // cout<<HuffmanTree[j].right;
            HuffmanTree[j].weight = t1.weight + t2.weight;
            HuffmanTree[j].info = -127;
            pq.push(HuffmanTree[j]);
            j++;
        }
        j--;
        Node HuffmanTreeHead = pq.top();
        HuffmanTreeHead.parent = -127;
        HuffmanTree[j] = HuffmanTreeHead;
        HuffmanTree[j].info = -127;
        pq.pop();
        cerr << "Creat Huffman Tree Completed" << endl;
        return 0;
    }
    int GetCodeNode(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 2; i++) {
   
   
            if (HuffmanTree[i].info == -127)
                continue;
            int IndexForSearching = i;
            HuffmanTree[i].BinaryCode = "";
            int j = 0;
            while (HuffmanTree[IndexForSearching].parent != -127) {
   
   
                j = HuffmanTree[IndexForSearching].parent;
                if (HuffmanTree[j].left == IndexForSearching)
                    HuffmanTree[i].BinaryCode += '0';
                if (HuffmanTree[j].right == IndexForSearching)
                    HuffmanTree[i].BinaryCode += '1';
                IndexForSearching = j;
            }
            reverse(HuffmanTree[i].BinaryCode.begin(),
                    HuffmanTree[i].BinaryCode.end());
        }
        cerr << "Get Node Code Completed" << endl;
        return 0;
    }
    string Encode(string path1, Node HuffmanTree[], int LeafNumber) {
   
   
        ifstream instr(path1, ios::in | ios::binary);
        char ch;
        unsigned int bytebuff = 0;
        int bitmask = 0x80;
        string HuffmanPath = "";
        while (instr.get(ch)) {
   
   
            bytebuff = (int)(unsigned char)ch;
            int value = bytebuff;
            for (int i = 0; i < LeafNumber; i++) {
   
   
                if (HuffmanTree[i].info == value) {
   
   
                    HuffmanPath += HuffmanTree[i].BinaryCode;
                    break;
                }
            }
        }
        cerr << "Encode Completed" << endl;
        return HuffmanPath;
    }
    int ShowTable(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 1; i++) {
   
   
            cout << "i:" << i << endl;
            cout << HuffmanTree[i].index << "<-index" << endl;
            cout << HuffmanTree[i].info << "<-info" << endl;
            cout << HuffmanTree[i].side << "<-side" << endl;
            cout << HuffmanTree[i].left << "<-left" << endl;
            cout << HuffmanTree[i].right << "<-right" << endl;
            cout << HuffmanTree[i].parent << "<-parent" << endl;
            cout << HuffmanTree[i].weight << "<-weight" << endl;
            cout << HuffmanTree[i].BinaryCode << "<-code" << endl;
        }
        return 0;
    }
    int Count(string op, string path1, string path2, int tong[]) {
   
   
        // TODO:统计文件1字符出现频率
        // path1是源文件,path2是目标文件
        ifstream instr(path1, ios::in | ios::binary);
        unsigned int bytebuff = 0;
        char ch;
        while (instr.get(ch)) {
   
   
            bytebuff =(int)(unsigned char)ch;
            tong[bytebuff]++;
        } //统计weight完成!
        int LeafNumber = 0;
        for (int i = 0; i <= 256; i++) {
   
   
            if (tong[i] != 0)
                LeafNumber++;
        }
        instr.close();
        cerr << "Count Completed" << endl;
        return LeafNumber;
    }
    //把哈夫曼树存到FinalOutputString
    string SwitchStringToBinary(string HuffmanPath, int &Sign) {
   
   
        // TODO:将字符串里的01序列修改为bit
        // 最后一个字节要处理多余的0-->把0放后面
        string BinaryPath = "";
        int bytebuff = 0;
        int shiftcount = 0;
        for (int i = 0; i < HuffmanPath.size(); i++) {
   
   
            bytebuff += (HuffmanPath[i] == '1' ? 1 : 0);
            bytebuff <<= 1;
            shiftcount++;
            if (shiftcount == 8) {
   
   
                bytebuff >>= 1;
                BinaryPath += (char)bytebuff;
                bytebuff = 0;
                shiftcount = 0;
                if (i == HuffmanPath.size() - 1)
                    break;
                if (i + 8 > HuffmanPath.size()) {
   
   
                    i++;
                    while (i <= HuffmanPath.size() - 1) {
   
   
                        bytebuff += (HuffmanPath[i] == '1' ? 1 : 0);
                        bytebuff <<= 1;
                        shiftcount++;
                        i++;
                    }
                    bytebuff <<= 7 - shiftcount;
                    BinaryPath += (char)bytebuff;
                }
            }
        }
        Sign = 8 - shiftcount;
        cerr << "Switch String To Binary Completed" << endl;
        return BinaryPath;
    }
    int GetSide(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 2; i++) {
   
   
            if (i == HuffmanTree[HuffmanTree[i].parent].left)
                HuffmanTree[i].side = 'l';
            if (i == HuffmanTree[HuffmanTree[i].parent].right)
                HuffmanTree[i].side = 'r';
        }
        return 0;
    }
    int WriteToFile(string path2, Node HuffmanTree[], int LeafNumber,
                    string BinaryPath, int Sign) {
   
   
        // path2是要写的目标文件
        //打开二进制文件输出流
        // ShowTable(HuffmanTree, LeafNumber);
        LeafNumber -= 1;
        ofstream outstr(path2, ios::binary);
        outstr.write(reinterpret_cast<char *>(&Sign), sizeof(char));
        outstr.write(reinterpret_cast<char *>(&LeafNumber), sizeof(char));
        LeafNumber += 1;
        for (int i = 0; i < LeafNumber; i++) {
   
   
            //获取要写的内容的地址,转换为char*
            HuffmanTree[i].parent -= LeafNumber;
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].info),
                         sizeof(char));
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].parent),
                         sizeof(char));
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].side),
                         sizeof(char));
        }
        for (int i = LeafNumber; i < 2 * LeafNumber - 1; i++) {
   
   
            HuffmanTree[i].parent -= LeafNumber;
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].parent),
                         sizeof(char));
            outstr.write(reinterpret_cast<char *>(&HuffmanTree[i].side),
                         sizeof(char));
        }
        for (int i = 0; i < BinaryPath.size(); i++) {
   
   
            outstr.put(BinaryPath[i]);
        }
        outstr.close();
        cerr << "Write To File Completed" << endl;
        return 0;
    }
};
class Decompression {
   
   
  public:
    int ShowTable(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 1; i++) {
   
   
            cout << "i:" << i << endl;
            cout << HuffmanTree[i].index << "<-index" << endl;
            cout << HuffmanTree[i].info << "<-info" << endl;
            cout << HuffmanTree[i].side << "<-side" << endl;
            cout << HuffmanTree[i].left << "<-left" << endl;
            cout << HuffmanTree[i].right << "<-right" << endl;
            cout << HuffmanTree[i].parent << "<-parent" << endl;
        }
        return 0;
    }
    Tree ReadFile(string path1, unsigned int &LeafNumber, string &SearchPath,
                  int &Sign) {
   
   
        // Sign是要删去末尾的几个0
        ifstream instr(path1, ios::in | ios::binary);
        SearchPath = "";
        char ch;
        instr.get(ch);
        Sign = ch;
        instr.get(ch);
        LeafNumber = (int)(unsigned char)ch;
        LeafNumber += 1;
        Tree HuffmanTree = new Node[2 * LeafNumber - 1];
        unsigned int num;
        for (int i = 0; i < LeafNumber; i++) {
   
   
            instr.get(ch);
            num = (int)(unsigned char)ch;
            HuffmanTree[i].info = num;
            instr.get(ch);
            num = (int)(unsigned char)ch;
            HuffmanTree[i].parent = num + LeafNumber;
            instr.get(ch);
            HuffmanTree[i].side = ch;
            //初始化其他信息
            HuffmanTree[i].BinaryCode = "";
            HuffmanTree[i].index = i;
            HuffmanTree[i].left = -1;
            HuffmanTree[i].right = -1;
            HuffmanTree[i].weight = -1;
        }
        for (int i = LeafNumber; i < 2 * LeafNumber - 1; i++) {
   
   
            instr.get(ch);
            num = (int)(unsigned char)ch;
            HuffmanTree[i].parent = num + LeafNumber;
            instr.get(ch);
            HuffmanTree[i].side = ch;
            //初始化其他信息
            HuffmanTree[i].info = -10000;
            HuffmanTree[i].BinaryCode = "";
            HuffmanTree[i].index = i;
            HuffmanTree[i].left = -1;
            HuffmanTree[i].right = -1;
            HuffmanTree[i].weight = -1;
        }
        int bitmask = 0x80;
        while (instr.get(ch)) {
   
   
            num = (int)(unsigned char)ch;
            while (bitmask != 0) {
   
   
                if ((bitmask & num) != 0) {
   
   
                    SearchPath += '1';
                }
                if ((bitmask & num) == 0) {
   
   
                    SearchPath += '0';
                }
                bitmask >>= 1;
            }
            bitmask = 0x80;
        }
        SearchPath.erase(SearchPath.end() - Sign, SearchPath.end());
        instr.close();
        cerr << "Read File Completed" << endl;
        return HuffmanTree;
    }
    int BuiltHuffmanTree(Node HuffmanTree[], int LeafNumber) {
   
   
        for (int i = 0; i < 2 * LeafNumber - 2; i++) {
   
   
            if (HuffmanTree[i].side == 'l') {
   
   
                HuffmanTree[HuffmanTree[i].parent].left = i;
            }
            if (HuffmanTree[i].side == 'r') {
   
   
                HuffmanTree[HuffmanTree[i].parent].right = i;
            }
        }
        cerr << "Built Huffman Tree Completed" << endl;
        return 0;
    }
    int RestoreFile(string SearchPath, Node HuffmanTree[], string path2,
                    int LeafNumber) {
   
   
        ofstream outstr(path2, ios::out | ios::binary);
        int head = 2 * LeafNumber - 2, now = head;
        for (int i = 0; i < SearchPath.size(); i++) {
   
   
            if (SearchPath[i] == '0') {
   
   
                now = HuffmanTree[now].left;
            }
            if (SearchPath[i] == '1') {
   
   
                now = HuffmanTree[now].right;
            }
            if (HuffmanTree[now].left == -1 && HuffmanTree[now].right == -1) {
   
   
                char res = HuffmanTree[now].info;
                outstr.put(res);
                now = head;
            }
        }
        outstr.close();
        cerr << "Restore File Completed" << endl;
        return 0;
    }
};
//解析命令行:
int main(int argc, char *argv[]) {
   
   
    // //定义类
    Compression Compress;
    Decompression Decompress;
    if (argc != 4)
        ShowHelp();
    else if (stricmp(argv[1], "-z") == 0)
        cerr << "Zip " << argv[2] << " to " << argv[3] << " ..." << endl;
    else if (stricmp(argv[1], "-x") == 0)
        cerr << "Extract " << argv[2] << " to " << argv[3] << " ..." << endl;
    else {
   
   
        ShowHelp();
        return 0;
    }
    //把路径赋值给字符串
    const string op = argv[1];
    const string path1 = argv[2];
    const string path2 = argv[3];
    ifstream instr(path1, ios::in | ios::binary);
    if (!instr) {
   
   
        cerr << "Open File failed" << endl;
        return 0;
    }
    //路径分配
    if (op == "-z") {
   
   
        // cerr << "Ziping..." << endl;
        int tong[257] = {
   
   0};
        int LeafNumber =
            Compress.Count(op, path1, path2, tong); //统计字符出出现频率
        Tree HuffmanTree = new Node[2 * LeafNumber - 1];          //初始化树
        Compress.CreatHuffmanTree(tong, LeafNumber, HuffmanTree); //建树
        Compress.GetSide(HuffmanTree, LeafNumber);
        Compress.GetCodeNode(HuffmanTree, LeafNumber); //获得叶节点的编码
        // Compress.ShowTable(HuffmanTree, LeafNumber);
        string HuffmanPath =
            Compress.Encode(path1, HuffmanTree, LeafNumber); //文件进行编码
        int Sign;
        string BinaryPath =
            Compress.SwitchStringToBinary(HuffmanPath, Sign); //获得二进制串
        Compress.WriteToFile(path2, HuffmanTree, LeafNumber, BinaryPath,
                             Sign); //把哈夫曼树写入文件
        // Compress.ShowTable(HuffmanTree, LeafNumber);
        cerr << "Compression Completed" << endl;
        return 0;
    } 
    else if (op == "-x") {
   
   
        unsigned int LeafNumber;
        int Sign;
        string SearchPath;
        Tree HuffmanTree =
            Decompress.ReadFile(path1, LeafNumber, SearchPath, Sign);
        Decompress.BuiltHuffmanTree(HuffmanTree, LeafNumber);
        Decompress.RestoreFile(SearchPath, HuffmanTree, path2, LeafNumber);
        // Decompress.ShowTable(HuffmanTree, LeafNumber);
        cerr << "Decompress Completed" << endl;
    } else
        return 0;
    return 0;
}

part5.写在最后

其实我写完这些代码以后我感觉我使用的一些算法并不是很好,但是如果要改的话基本上就是重构代码了,工程量巨大。
给大家介绍一个大手子同学使用的算法:不把哈夫曼树写入文件,而是把一个类似于python里字典的东西存进文件:存入每个字符对应的编码长度和编码。这样在解压的时候可以构建一个map,就可以把文件还原。
老师的算法是:直接开辟长度为512的线性表,每个节点对应的下标就是对应的ASCII码。在文件中只存入父节点,经过处理,以下标大小来区分同一个父节点的左右子节点。

欢迎技术交流!!!
如果有更好的算法欢迎讨论!

最后,放张图大家笑一笑
在这里插入图片描述

版权声明
本文为[osc_hjtv1vkc]所创,转载请带上原文链接,感谢
https://my.oschina.net/u/4284510/blog/4837117

Tags c++ Python
Scroll to Top