欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

如何用Java实现啥夫曼编码

程序员文章站 2023-12-16 23:08:22
大家可能会想,程序和第三方提供了很多压缩方式,何必自己写压缩代码呢?不错,如gzip这样的压缩工具很多,可是在某些情况下(如文本内容小且字符不重复),gzip压缩后会比原始...

大家可能会想,程序和第三方提供了很多压缩方式,何必自己写压缩代码呢?不错,如gzip这样的压缩工具很多,可是在某些情况下(如文本内容小且字符不重复),gzip压缩后会比原始文本还要大。所以在某些特殊情况下用自己的压缩方式可以更优。

大家可能早已忘记了在学校学习的哈夫曼知识,可以先在百度百科了解一下哈夫曼知识:

哈夫曼思想:统计文本字符重复率,求出各字符权值,再构造出一颗最优二叉树(又称哈夫曼树),然后给每个叶子结点生成一个以位(bit)为单位的码值,每个码值不能做为其 他码值的前缀,再将码值合并以每8个生成一个字节。

复制代码 代码如下:

package com.huffman;

/**
 * 结点
 * @author davee
 */
public class node implements comparable<node> {
    int weight;//权值
    node leftchild;//左孩子结点
    node rightchild;//右孩子结点
    string huffcode;
    private boolean isleaf;//是否是叶子
    character value;

    public node(character value, int weight) {
        this.value = value;
        this.weight = weight;
        this.isleaf = true;
    }

    public node(int weight, node leftchild, node rightchild) {
        this.weight = weight;
        this.leftchild = leftchild;
        this.rightchild = rightchild;
    }

    public void increaseweight(int i) {
        weight += i;
    }

    public boolean isleaf() {
        return isleaf;
    }

    @override
    public int compareto(node o) {
        return this.weight - o.weight;
    }
}


复制代码 代码如下:

package com.huffman;

import java.math.biginteger;
import java.util.arraylist;
import java.util.collections;
import java.util.hashmap;
import java.util.map;
import java.util.treemap;

public class huffmantree {
    private boolean debug = false;

    private hashmap<character, node> nodemap;
    private arraylist<node> nodelist;

    public huffmantree() {
        nodemap = new hashmap<character, node>();
        nodelist = new arraylist<node>();
    }

    public void setdebug(boolean debug) {
        this.debug = debug;
    }

    public string decode(map<string, character> codetable, string binary) {
        int begin = 0, end = 1, count = binary.length();
        stringbuffer sb = new stringbuffer();
        while (end <= count) {
            string key = binary.substring(begin, end);
            if (codetable.containskey(key)) {
                sb.append(codetable.get(key));
                begin = end;
            } else {
            }
            end++;
        }
        return sb.tostring();
    }

    public string encode(string origintext) {
        if (origintext == null) return null;

        calculateweight(origintext);

//        if (debug) printnodes(nodelist);

        node root = generatehuffmantree(nodelist);

        generatehuffmancode(root, "");

        if (debug) printnodes(root);

        stringbuffer sb = new stringbuffer();
        for (character key : origintext.tochararray()) {
            sb.append(nodemap.get(key).huffcode);
        }
        if (debug) system.out.println("二进制:"+sb.tostring());

        return sb.tostring();
    }

    /**
     * 计算叶子权值
     * @param text
     */
    private void calculateweight(string text) {
        for (character c : text.tochararray()) {
            if (nodemap.containskey(c)) {
                nodemap.get(c).increaseweight(1);//权值加1
            } else {
                node leafnode = new node(c, 1);
                nodelist.add(leafnode);
                nodemap.put(c, leafnode);
            }
        }
    }

    /**
     * 生成哈夫曼树
     * @param nodes
     */
    private node generatehuffmantree(arraylist<node> nodes) {
        collections.sort(nodes);
        while(nodes.size() > 1) {
            node ln = nodes.remove(0);
            node rn = nodes.remove(0);
            insertsort(nodes, new node(ln.weight + rn.weight, ln, rn));
        }
        node root = nodes.remove(0);
        nodes = null;
        return root;
    }

    /**
     * 插入排序
     * @param sortednodes
     * @param node
     */
    private void insertsort(arraylist<node> sortednodes, node node) {
        if (sortednodes == null) return;

        int weight = node.weight;
        int min = 0, max = sortednodes.size();
        int index;
        if (sortednodes.size() == 0) {
            index = 0;
        } else if (weight < sortednodes.get(min).weight) {
            index = min;//插入到第一个
        } else if (weight >= sortednodes.get(max-1).weight) {
            index = max;//插入到最后
        } else {
            index = max/2;
            for (int i=0, count=max/2; i<=count; i++) {
                if (weight >= sortednodes.get(index-1).weight && weight < sortednodes.get(index).weight) {
                    break;
                } else if (weight < sortednodes.get(index).weight) {
                    max = index;
                } else {
                    min = index;
                }
                index = (max + min)/2;
            }
        }
        sortednodes.add(index, node);
    }

    private void generatehuffmancode(node node, string code) {
        if (node.isleaf()) node.huffcode = code;
        else {
            generatehuffmancode(node.leftchild, code + "0");
            generatehuffmancode(node.rightchild, code + "1");
        }
    }

    /**
     * 生成码表
     * @return
     */
    public map<string, character> getcodetable() {
        map<string, character> map = new hashmap<string, character>();
        for (node node : nodemap.values()) {
            map.put(node.huffcode, node.value);
        }
        return map;
    }

    /**
     * 打印节点信息
     * @param root
     */
    private void printnodes(node root) {
        system.out.println("字符  权值  哈夫码");
        printtree(root);
    }

    private void printtree(node root) {
        if (root.isleaf()) system.out.println((root.value == null ? "   " : root.value)+"    "+root.weight+"    "+(root.huffcode == null ? "" : root.huffcode));
        if (root.leftchild != null) printtree(root.leftchild);
        if (root.rightchild != null) printtree(root.rightchild);
    }

    /**
     * 打印节点信息
     * @param nodes
     */
    private void printnodes(arraylist<node> nodes) {
        system.out.println("字符  权值  哈夫码");
        for (node node : nodes) {
            system.out.println(node.value+"    "+node.weight+"    "+node.huffcode);
        }
    }
}


复制代码 代码如下:

package com.test;

import java.util.map;

import com.huffman.huffutils;
import com.huffman.huffmantree;

public class test {
    public static void main(string[] args) {
        string origintext = "abcdacaha";
        huffmantree huffmantree = new huffmantree();
        huffmantree.setdebug(true);//测试
        string binary = huffmantree.encode(origintext);
        byte[] bytes = huffutils.binary2bytes(binary);
        map<string, character> codetable = huffmantree.getcodetable();
        int lastbytenum = binary.length() % 8;
        system.out.println(bytes.length);
        //将bytes、codetable、 lastbytenum传递到服务器端
        //省略。。。。。。

        /*
                         服务器端解析
                         接收到参数,并转换成bytes、relationmap、 lastbytenum
        */
        string fullbinary = huffutils.bytes2binary(bytes, lastbytenum);
        system.out.println("服务器二进制:"+fullbinary);
        string retrievetext = huffmantree.decode(codetable, fullbinary);
        system.out.println("恢复文本:"+retrievetext);
    }
}

上一篇:

下一篇: