首页 > 编程资源分享区 > C/C++源代码共享 > 用哈夫曼树实现文件的压缩[非原创]
2006
01-18

用哈夫曼树实现文件的压缩[非原创]




刚写的一个上机实验题,可以模拟压缩文本文件,只是文件中带了数字后就出错了,不知怎么回事?




/*=================================================*/
/*                  哈夫曼树编码                   */
/*用哈夫曼树算法对文本文件进行0-1化编码,并模拟文件*/
/*压缩与解压                                       */
/*作者:踏网无痕                                   */
/*时间:2004/05/27-29                              */
/*=================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>

#define MAX_SINGLECODE_LEN 10        //单个字符最大码长
#define MAX_STRING_LEN 1000            //要编码的字符串的最大长度
#define MAX_CODESTRING_LEN 10000             //产生的二进制码的最大长度
#define MAX_WORDS 1000            //要编码的字符串中字符种数最大值
#define END_TREE 30000            //树部分存储的结束符
#define PATH_LEN 50            //路径串最大长度

/*****哈夫曼树结构定义*****/
typedef struct Huffmantree
{
    char ch;                //字符部分
    int weight;                //结点权值
    int    mark;                //标记是否加入树中
    struct Huffmantree *parent,*lchild,*rchild,*next;
}HTNode,*LinkTree;

/*****编码字典结构定义*****/
typedef struct
{
    char ch;                //字符部分
    char code[MAX_SINGLECODE_LEN];    //编码部分
}CodeDictionary;

/*********函数声明*********/
LinkTree setWeight(char *);
LinkTree sortNode(LinkTree);
LinkTree createHTree(LinkTree);
void codeHTree(LinkTree,CodeDictionary *);
void decodeHTree(LinkTree,char *,char *);
void deleteNode(LinkTree);
void compressString(char *s,CodeDictionary *,char *);
void readFile(char *);
void writeFile(char *);
void readCode(LinkTree,char *);
void writeCode(LinkTree,char *);
void menu();

/**
*主函数
*输入:空
*返回:空
*/
void main(void)
{
    char choice;            //菜单选择变量
    char string[MAX_STRING_LEN];    //保存从文件中读取的内容
    LinkTree temp;            //保存赋了权值的表
    LinkTree ht;            //保存排序后的表
    LinkTree htcopy,tempcopy;        //表备份
    LinkTree htree;            //保存哈夫曼树
    LinkTree ptr=NULL;
    CodeDictionary codedictionary[MAX_WORDS];//编码字典
    char codestring[MAX_CODESTRING_LEN];    //保存0-1形的代码串     
    char codestring2[MAX_CODESTRING_LEN];//保存0-1形的代码串
    LinkTree ht2;            //保存读取的树
    LinkTree htree2;            //保存排序后的表
    char filestring[MAX_STRING_LEN];    //解码后要写入文件中的内容

    if((ht2=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建链表的头结点
    {
        printf(“内存不足!”);
        getch();
        exit(0);
    }
    ht2->next=NULL;

    while(1)
    {
        menu();            //调入主菜单
        choice=getch();        //读入用户选项
        switch(choice)        //判断用户选择
        {
             case ‘c’:
             case ‘C’:
                printf(“\n您选择了压缩文件模式:\n\n”);
                readFile(string);    //读取要编码的文件(字符串)
                temp=setWeight(string);            //得到有权值的表
                 tempcopy=setWeight(strin g);
                ht=sortNode(temp);                //按权值排序后的表
                htcopy=sortNode(tempcopy);        //用于记录解码树
                htree=createHTree(ht);            //得到哈夫曼树
                 code HTree(htree,codedictionary);//哈夫曼编码
                compressString(string,codedictionary,codestring);//压缩为0-1码
                writeCode(htcopy,codestring);    //将解码树和0-1码保存                
                deleteNode(htree);                //释放空间*/
                 break;
             case ‘u’:
             case ‘U’:
                printf(“您选择了解压缩文件模式:\n\n”);
                readCode(ht2,codestring2);        //读取要解码的0-1码
                htree2=createHTree(ht2);        //得到哈夫曼树
                 code HTree(htree2,codedictionary);//哈夫曼编码
                 decodeHTree(htree2,codes tring2,filestring); //解码
                writeFile(filestring);            //将解码文件保存
                deleteNode(htree2);                //释放空间
                 break;
             case ‘e’:
             case ‘E’:
                exit(0);                        //退出程序
        }
    }
}


/**
*整理输入的字符串,求出每个字符在数组中出现的次数,作为权值
*输入:(字符型指针)字符串的地址
*返回:(哈夫曼结点指针)含权链表的首地址
*/
LinkTree setWeight(char *string)
{
    int i=0;                                    //文件字符串下标
    LinkTree tree;                                //头指针
    LinkTree ptr,beforeptr;                        //创建指针与其前驱
    HTNode *node;
    
    if((tree=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建链表的头结点
        return NULL;
    tree->next=NULL;
    
    for(i=0;string!=’\0′;i++)
    {    
        ptr=tree;
        beforeptr=tr ee;
        
        if((node=(HT Node *)malloc(sizeof(HTNode)))==NULL)
             return NULL;
        node->nex t=NULL;
        node->par ent=NULL;
        node->lch ild=NULL;
        node->rch ild=NULL;
        node->mar k=0;
        node->ch= string;
        node->wei ght=1;
        
        if(tree->next==NULL)                    //如果是第一个非头结点
             tree->next=node;
        else
        {    
             ptr=tree->next;
            while(ptr&&ptr->ch!=node->ch)        //查找相同字符
             {    
                 ptr=ptr->next;
                 beforeptr=beforeptr-> next;
             }
            if(ptr&&ptr->ch==node->ch)            //如果链表中某结点的字符与新结点的字符相同
             {    
                ptr->weight++;                    //将该结点的权加一  
                 free(node);    
             }
            else                                //将新结点插入链表后
             {    
                 node->next=beforeptr- >next;
                 beforeptr->next=node;
             }
        }
    }
    return tree;                                //返回头指针
}

/**
*将整理完的字符串(带权链表)按出现次数从小到大的顺序排列
*输入:(哈夫曼结点指针)要排序的表头地址
*返回:(哈夫曼结点指针)排序后的表头地址
*/
LinkTree sortNode(LinkTree tree)
{    
    LinkTree head;                                //头指针
    LinkTree ph,beforeph;                        //创建指针及其前驱
    LinkTree pt;
    
    if((head=(LinkTree)malloc(sizeof(HTNode)))==NULL)//创建新链表的头结点
        return NULL;
    head->next=NULL;
    
    ph=head;
    beforeph=head;
    
    while(tree->next)
    {    
        pt=tree->next;                            //取被操作链表的头结点
        tree->nex t=pt->next;
        pt->next= NULL;
        
        ph=head-> next;
        beforeph=hea d;
        
        if(head-> next==NULL)
            head->next=pt;                        //创建当前操作链表头结点
        else
        {
            while(ph&&ph->weight<pt->weight)    //将被操作结点插入相应位置
             {    
                 ph=ph->next;
                 beforeph=beforeph->ne xt;
             }
             pt->next=beforeph->next;
             beforeph->next=pt;
        }
    }
    free(tree);
    return head;                                //返回排序后的头指针
}

/**
*用排完序的字符串建立哈夫曼树
*输入:(哈夫曼结点指针)要建立哈夫曼树的地址
*返回:(哈夫曼结点指针)建立后的哈夫曼树地址
*/
LinkTree createHTree(LinkTree tree)
{   
    LinkTree p,q,beforep;
    HTNode *newnode;
    
    for(p=tree->next,q=p->next;p!= NULL&&q!=NULL;p=tree->next,q=p->next)
                                                //p、q初值为头结点后的两个结点,即最小权结点
    {
        tree->nex t=q->next;
        q->next=N ULL;
        p->next=N ULL;
        
        if((newnode= (HTNode *)malloc(sizeof(HTNode)))==NULL)
                                                //申请新结点作为哈夫曼树的中间结点
             return NULL;
        newnode-> next=NULL;
        newnode-> mark=0;
        
        newnode->lchild=p;                        //取链表头结点后的两个结点作为新结点的左、右孩子
        newnode-> rchild=q;
        p->parent =newnode;
        q->parent =newnode;
        newnode->weight=p->weight+q->weight;    //权值相加
        
        p=tree->n ext;
        beforep=tree ;
        
        if(p!=NULL&&p->weight>=newnode->weight)
        {
             newnode->next=beforep->next;         //将新结点插入原链表的相应位置
             beforep->next=newnode;    
        }
        else
        {
             while(p!=NULL&&p->weight<newnode-& gt;weight)
             {    
                 p=p->next;
                 beforep=beforep->next ;
             }
             newnode->next=beforep->next;
             beforep->next=newnode;
        }
    }
    return (tree->next);
}

/**
*对哈夫曼树进行编码
*输入:(哈夫曼结点指针)要编码的哈夫曼树地址
*      (编码字典类型指针)存放字典的首地址
*返回:空
*/
void codeHTree(LinkTree tree,CodeDictionary *codedictionary)
{   
    int index=0,k=0;
    char code[MAX_SINGLECODE_LEN];                //用于统计每个字符的哈夫曼编码
    LinkTree ptr=tree;                            //从树的根结点开始

    if(ptr==NULL)
    {
        printf(“要压缩的文件是空的!\n”);
        exit(0);
    }
    else
    {
        while(ptr-&g t;lchild&&ptr->rchild&&ptr->mark==0)
        {
             while(ptr->lchild&&ptr->lchild-&gt ;mark==0)
             {
                 code [index++]=’0′;                //左支路编码为0
                 ptr=ptr->lchild;
                 if(!ptr-> lchild&&!ptr->rchild)     //如果没有左右孩子,即叶子结点
                  {
                      ptr->mark=1;                 //作标记,表明该字符已被编码
                      code [index]=’\0′;            //编码0-1字符串结束
                      code dictionary[k].ch=ptr->ch;//给字典赋字符值
                      for(index=0;code[index]!=’\0′;index++)
                          code dictionary[k].code[index]=code[index];//给字典赋码值
                      code dictionary[k].code[index]=’\0′;
                      k++;
                      ptr=tree;                     //指针复位
                      index=0;
                  }
             }
             if(ptr->rchild&&ptr->rchild->ma rk==0)
             {
                 ptr=ptr->rchild;
                 code [index++]=’1′;                //右支路编码为1
             }
             if(!ptr-> lchild&&!ptr->rchild)         //如果没有左右孩子,即叶子结点
             {
                 ptr->mark=1;
                 code [index++]=’\0′;
                 code dictionary[k].ch=ptr->ch;    //给字典赋字符值
                 for(index=0;code [index]!=’\0′;index++)
                      code dictionary[k].code[index]=code[index];//给字典赋码值
                 code dictionary[k].code[index]=’\0′;
                 k++;
                 ptr=tree;
                 index=0;
             }
            if(ptr->lchild->mark==1&&ptr->rchild->mark==1)//如果左右孩子都已标记
             {
                 ptr->mark=1;
                 ptr=tree;
                 index=0;
             }
        }
    }
    printf(“\n”);
}

/**
*解码,即将0-1码转化为字符串
*输入:(哈夫曼结点指针)编码树的地址
*      (字符型指针)要解码的0-1字符串地址
*      (字符型指针)解码后的字符串地址
*返回:空
*/
void decodeHTree(LinkTree tree,char *code,char *filestring)
{    
    int i=0,j=0,k=0;
    char *char0_1;
    LinkTree ptr=tree;
    char0_1=(char *)malloc(MAX_SINGLECODE_LEN);    //此数组用于统计输入的0-1序列
    
    printf(“预览解压后的字符:\n”);
    for(j=0,ptr=tree;code !=’\0′&&ptr->lchild&&ptr->rchild;j=0,p tr=tree)
    {
        for(j=0;code !=’\0′&&ptr->lchild&&ptr->rchild;j++,i ++)
        {
             if(code==’0′)
             {    
                 ptr=ptr->lchild;
                 char0_1 [j]=’0′;  
             }
             if(code==’1′)
             {
                 ptr=ptr->rchild;
                 char0_1 [j]=’1′;
             }
        }                        
        if(!ptr-> lchild&&!ptr->rchild)
        {
            printf(“%c”,ptr->ch);                //显示解压后的字符
             filestring[k++]=ptr->ch;            //将字符逐一保存到字符串里
        }
        if(code==’\0′&&ptr->lchild&&ptr->rchild)
        {    
             char0_1[j]=’\0′;
            printf(“\n没有与最后的几个0-1序列:%s相匹配的字符!\n”,char0_1);
             return;
        }
    }
    printf(“\n\n”);
    filestring[k]=’\0′;
    free(char0_1);
}

/**
*释放哈夫曼树所占用的空间
*输入:(哈夫曼结点指针)要释放的结点地址
*返回:空
*/
void deleteNode(LinkTree tree)
{    
    LinkTree ptr=tree;
    if(ptr)
    {    
        deleteNode(p tr->lchild);
        deleteNode(p tr->rchild);
        free(ptr);
    }
}

/**
*将整个字符串转化为0-1的字符串
*输入:(字符型指针)待转化的字符串首地址
*      (编码字典类型指针)字典首地址
*      (字符型指针)接收0-1码串的首地址
*返回:空
*/
void compressString(char *string,CodeDictionary *codedictionary,char *codestring)
{
    int i=0,j=0,k=0,m;

    while(string)                            //整个文件字符串没结束时
    {
        while(string!=codedictionary[j].ch&&j<MAX_WORDS)
                                                //找与对应字符相同的字符
             j++;
        if(string==codedictionary[j].ch)        //如果找到与对应字符相同的字符
             for(m=0;codedictionary [j].code[m];m++,k++)
                 code string[k]=codedictionary[j].code[m];
        j=0;                                    //字典复位
        i++;
    }
    codestring[k]=’\0′;
}

/**
*把指定文件读到字符串中
*输入:(字符型指针)待接收文件的字符串地址
*返回:空
*/
void readFile(char *string)
{
    FILE *fp;
    int i;
    char ch;                                    //记录读入的字符
    char path[PATH_LEN];                        //文本文件的读路径

    printf(“请输入要压缩的文本文件地址:(无需扩展名)”);
    gets(path);
    if((fp=fopen(strcat(path,”.txt”),”r” ))==NULL)
    {
        printf(“\n路径不正确!\n”);
        getch();
        return;
    }

    ch=fgetc(fp);
    for(i=0;ch!=EOF;i++)
    {
        string=ch;
        ch=fgetc(fp) ;
    }
    string=’\0′;
    fclose(fp);
}

/**
*保存编码后的解码树和字符串
*输入:(哈夫曼结点指针)解码树的地址
*      (字符型指针)要保存的0-1码串首地址
*返回:空
*/
void writeCode(LinkTree tree,char *string)
{
    FILE *fp;
    int i;
    int weight;                                    //记录写入的权值
    char ch;                                    //记录写入的字符
    LinkTree p;
    char path[PATH_LEN];                        //0-1码文件的写路径

    printf(“请输入压缩后的保存路径及文件名:(无需扩展名)”);
    gets(path);
    if((fp=fopen(strcat(path,”.yxy”),”w+ “))==NULL)
    {
        printf(“\n文件路径出错!\n”);
        getch();
        return;
    }
    p=tree->next;

    /*解码树部分写入文件前部分*/
    do
    {
        ch=p->ch;
        weight=p-&gt ;weight;
        fprintf(fp,” %c%d”,ch,weight);
        p=p->next ;
    }while(p);
    fprintf(fp,”%c%d”,’^',END_TREE);

    fseek(fp,sizeof(char),1);                    //空出区分位
    
    /*0-1码写入文件后部分*/
    for(i=0;string;i++)
    {
        ch=string;
        fputc(ch,fp) ;
    }
    printf(“\n压缩成功!\n”);
    getch();
    fclose(fp);
}

/**
*读取编码后的0-1字符串
*输入:(哈夫曼结点指针)解码树的地址
*      (字符型指针)要接收的0-1码串首地址
*返回:空
*/
void readCode(LinkTree tree,char *string)
{

    FILE *fp;
    int i;
    int weight;                                    //记录读入的权值
    char ch;                                    //记录读入的字符
    LinkTree ptr,beforeptr;
    char path[PATH_LEN];                        //0-1码文件的读路径

    printf(“请输入要解压的文件路径及文件名:(无需扩展名)”);
    gets(path);
    if((fp=fopen(strcat(path,”.yxy”),”r” ))==NULL)
    {
        printf(“\n文件路径出错!\n”);
        getch();
        return;
    }
    beforeptr=tree;

    /*从文件前部分读出解码树*/
    fscanf(fp,”%c%d”,&ch,&weight );
    while(weight!=END_TREE)
    {
        if((ptr=(Lin kTree)malloc(sizeof(HTNode)))==NULL)
        {
            printf(“内存不足!”);
             getch();
            exit(1);                            //错误出口
        }
        ptr->ch=c h;
        ptr->weig ht=weight;
        ptr->lchi ld=NULL;
        ptr->rchi ld=NULL;
        ptr->pare nt=NULL;
        ptr->mark =0;
        beforeptr-&g t;next=ptr;
        beforeptr=pt r;
         fscanf(fp,”%c%d”,&ch,&weight );
    }
    beforeptr->next=NULL;

    fseek(fp,sizeof(char),1);                    //文件指针定位
    
    /*从文件后部分读出0-1码*/
    ch=fgetc(fp);
    for(i=0;ch!=EOF;i++)
    {
        string=ch;
        ch=fgetc(fp) ;
    }
    string=’\0′;
    fclose(fp);
}

/**
*保存解码后的文件
*输入:(字符型指针)解码后的字符串地址
*返回:空
*/
void writeFile(char *string)
{
    FILE *fp;
    char ch;                                    //记录写入的字符
    int i;
    char path[PATH_LEN];                        //文本文件的写路径

    printf(“请输入解压后的保存路径及文件名:(无需扩展名)”);
    gets(path);
    if((fp=fopen(strcat(path,”.txt”),”w+ “))==NULL)
    {
        printf(“\n文件路径出错!\n”);
        getch();
        return;
    }

    for(i=0;string;i++)
    {
        ch=string;
        fputc(ch,fp) ;
    }
    printf(“\n解压成功!\n”);
    getch();
    fclose(fp);
}

/**
*显示主菜单
*输入:空
*返回:空
*/
void menu()
{
    printf(“\n\n\n\n\n\n”);
    printf(“\t\t —–**   欢迎使用WINYXY压缩工具   **—–”);
    printf(“\n\n\n”);
    printf(“\t\t\t\t<c> 压 缩\n\n”);
    printf(“\t\t\t\t<u> 解 压\n\n”);
    printf(“\t\t\t\t<e> 退 出\n\n”);
    printf(“\n\n请按键选择:\n”);
    printf(“\n\n\n\n\n\n”);
}


留下一个回复