热门:网页模板.net视频教程JQueryMVCjsonExtJs源码示例三级联动JQuery菜单
您现在的位置:.Net中文社区>> XML编程>>正文内容

浅谈XML压缩算法

发布时间:2009年09月23日点击数: 未知

 XML压缩单元测试代码

class Program {  
   public static string XML = @"<?xml version=""1.0"" encoding=""utf-16""?>
    <Customer>
<CustomerID>ALFKI</CustomerID>
<PO>9572658</PO>
<Address AddressType=""work"">
    <Street>One Main Street</Street>
    <City>Anywhere</City>
    <State>NJ</State>
    <Zip>08080</Zip>
</Address>
<Order>
    <OrderID>10966</OrderID >
    <LineItem>
        <ProductID>37</ProductID>
        <UnitPrice>26.50 </UnitPrice>
        <Quantity>8</Quantity>
        <Description>Gravad lax </Description>              
    </LineItem>
    <LineItem>
        <ProductID>56 </ProductID>
        <UnitPrice>38.00</UnitPrice>
        <Quantity>12</Quantity>
        <Description>Gnocchi di nonna Alice</Description>              
    </LineItem>
</Order>      
</Customer>";  
static void Main(string[] args) {  
    XmlZip zip = new XmlZip();  

    byte[] bs = Encoding.UTF8.GetBytes(XML);  
    Console.WriteLine("原始文件长度:{0}", bs.Length);  
    MemoryStream ms = new MemoryStream();  
    DeflateStream compressedzipStream = new DeflateStream(ms, CompressionMode.Compress, true);  
    compressedzipStream.Write(bs, 0, bs.Length);  
    compressedzipStream.Close();  
    Console.WriteLine("Deflate压缩后长度: {0}", ms.Length);  

    zip.Init(XML);  
    bs = zip.XmlToBytes(XML);  
    Console.WriteLine("XML压缩后长度:{0}", bs.Length);  
    string str = zip.BytesToXml(bs);  
    Console.WriteLine("还原后长度:{0}", Encoding.UTF8.GetByteCount(str));  
    Console.WriteLine(str);  


    ms = new MemoryStream();  
    compressedzipStream = new DeflateStream(ms, CompressionMode.Compress, true);  
    compressedzipStream.Write(bs, 0, bs.Length);  
    compressedzipStream.Close();  
    Console.WriteLine("先XML压缩,再Deflate压缩后的长度:{0}", ms.Length);  
    Console.ReadKey();  

}  

 

测试输出

原始文件长度:740

Deflate压缩后长度: 438

XML压缩后长度:295

还原后长度:727

<?xml version="1.0" encoding="utf-16"?>
<Customer>
  <CustomerID>ALFKI</CustomerID>
  <PO>9572658</PO>
  <Address AddressType="work">
    <Street>One Main Street</Street>
    <City>Anywhere</City>
    <State>NJ</State>
    <Zip>08080</Zip>
  </Address>
  <Order>
    <OrderID>10966</OrderID>
    <LineItem>
      <ProductID>37</ProductID>
      <UnitPrice>26.50 </UnitPrice>
      <Quantity>8</Quantity>
      <Description>Gravad lax </Description>              
    </LineItem>
    <LineItem>
      <ProductID>56 </ProductID>
      <UnitPrice>38.00</UnitPrice>
      <Quantity>12</Quantity>
      <Description>Gnocchi di nonna Alice</Description>              
    </LineItem>
  </Order>
</Customer> 

 

先XML压缩,再Deflate压缩后的长度:357

可以看到,XML压缩后的数据约是原来数据的3分之一,可能没有其它专有的压缩算法的压缩率高,但效果还算是满意吧,而且我的算法是比较通用的,只要通信双方知道了XML的Schema,甚至双方只需要有一段完整的示例代码,就可以进行压缩通信,只做了功能测试,没做性能测试,大家可以先借鉴下思路。

完整代码

大致原理,就是通信双方各持有一个XML文档节点名称,属性名称的一个字典,然后发送方传输的时候用ushort代替原有的XML标签和属性名,接收方通过字典把ushort再转换成原始的元素名和属性名,这样大量不必要的重复的标签等就省去了。

代码只做本文的示例,写的比较随意,没有什么防御性和健壮性。

internal enum ItemType {  
    Element,  
    Attritube  
}  
internal class XmlNodeItem {  
    public string Xpath { get; set; }  
    public string Text { get; set; }  
    public ItemType ItemType { get; set; }  
    public override string ToString() {  
        return Xpath;  
    }
  
}
  
internal class MyXpath {  
    LinkedList<string> _node = new LinkedList<string>();  
    public void AddElement(string name) {  
        _node.AddLast(string.Format("/{0}", name));  
    }
  
    public void AddAttribute(string name) {  
        _node.AddLast(string.Format("/@{0}", name));  
    }
  
    public void RemoveLastElement() {  
        _node.RemoveLast();  
    }
  
    public override string ToString() {  
        StringBuilder sb = new StringBuilder();  
        LinkedListNode<string> node = _node.First;  
        sb.Append(node.Value);  
        while ((nodenode = node.Next) != null) {  
            sb.Append(node.Value);  
        }
  
        return sb.ToString();  
    }
  
}
  
class XmlZip {  
    Dictionary<ushort, XmlNodeItem> _map = new Dictionary<ushort, XmlNodeItem>();  
    Dictionary<string, ushort> _map2 = new Dictionary<string, ushort>();  
    MyXpath _path = new MyXpath();  

    public void Init(string xmlInput) {  
        StringReader sr = new StringReader(xmlInput);  
        XmlReader reader = XmlReader.Create(sr);  
        MemoryStream ms = new MemoryStream();  
        ushort i = 1;  
        while (reader.Read()) {  
            switch (reader.NodeType) {  
                case XmlNodeType.Element:  
                    _path.AddElement(reader.Name);  
                    _map[i++] = new XmlNodeItem() {  
                        Xpath = _path.ToString(),  
                        Text = reader.Name,  
                        ItemTypeItemType = ItemType.Element  
                    }
;  
                    if (reader.HasAttributes) {  
                        reader.MoveToFirstAttribute();  
                        _path.AddAttribute(reader.Name);  
                        _map[i++] = new XmlNodeItem() {  
                            Xpath = _path.ToString(),  
                            Text = reader.Name,  
                            ItemTypeItemType = ItemType.Attritube  
                        }
;  
                        _path.RemoveLastElement();  
                        while (reader.MoveToNextAttribute()) {  
                            _path.AddAttribute(reader.Name);  
                            _map[i++] = new XmlNodeItem() {  
                                Xpath = _path.ToString(),  
                                Text = reader.Name,  
                                ItemTypeItemType = ItemType.Attritube  
                            }
;  
                            _path.RemoveLastElement();  
                        }
  
                        reader.MoveToElement();  
                    }
  
                    if (reader.IsEmptyElement) _path.RemoveLastElement();  
                    break;  
                case XmlNodeType.EndElement:  
                    _path.RemoveLastElement();  
                    break;  
                default:  
                    break;  
            }
  
        }
  
        foreach (KeyValuePair<ushort, XmlNodeItem> pair in _map) {  
            _map2[pair.Value.Xpath] = pair.Key;  
        }
  
    }
  

    public byte[] XmlToBytes(string xmlInput) {  
        StringReader sr = new StringReader(xmlInput);  
        XmlReader reader = XmlReader.Create(sr);  
        MemoryStream ms = new MemoryStream();  
        BinaryWriter bw = new BinaryWriter(ms);  
        while (reader.Read()) {  
            ushort index;  
            byte[] bs;  
            switch (reader.NodeType) {  
                case XmlNodeType.Element:  
                    _path.AddElement(reader.Name);  
                    if (_map2.TryGetValue(_path.ToString(), out index)) {  
                        bw.Write(index);  
                    }
  
                    if (reader.HasAttributes) {  
                        reader.MoveToFirstAttribute();  
                        _path.AddAttribute(reader.Name);  
                        if (_map2.TryGetValue(_path.ToString(), out index)) {  
                            _path.RemoveLastElement();  
                            bw.Write(index);  
                            bs = Encoding.UTF8.GetBytes(reader.Value);  
                            bw.Write((ushort)bs.Length);  
                            bw.Write(bs);  
                        }
  
                        while (reader.MoveToNextAttribute()) {  
                            _path.AddAttribute(reader.Name);  
                            if (_map2.TryGetValue(_path.ToString(), out index)) {  
                                _path.RemoveLastElement();  
                                bw.Write(index);  
                                bs = Encoding.UTF8.GetBytes(reader.Value);  
                                bw.Write((ushort)bs.Length);  
                                bw.Write(bs);  
                            }
  
                        }
  
                        reader.MoveToElement();  
                    }
  
                    if (reader.IsEmptyElement) {  
                        _path.RemoveLastElement();  
                        bw.Write(ushort.MaxValue);  
                    }
  
                    break;  
                case XmlNodeType.EndElement:  
                    _path.RemoveLastElement();  
                    bw.Write(ushort.MaxValue);  
                    break;  
                case XmlNodeType.Text:  
                    bw.Write((ushort)0);  
                    bs = Encoding.UTF8.GetBytes(reader.Value);  
                    bw.Write((ushort)bs.Length);  
                    bw.Write(bs);  
                    break;  
                default:  
                    break;  
            }
  
        }
  
        bw.Close();  
        ms.Close();  
        reader.Close();  
        return ms.ToArray();  
    }
  

    public string BytesToXml(byte[] bytes) {  
        MemoryStream ms = new MemoryStream(bytes);  
        BinaryReader br = new BinaryReader(ms);  
        StringBuilder sb = new StringBuilder();  
        StringWriter sw = new StringWriter(sb);  
        XmlWriterSettings settings = new XmlWriterSettings();  
        settings.Indent = true;  
        XmlWriter writer = XmlWriter.Create(sw, settings);  

        XmlNodeItem item;  
        while (br.PeekChar() != -1) {  
            ushort readFlag = br.ReadUInt16();  
            int len;  
            byte[] bs;  
            string str;  
            if (_map.TryGetValue(readFlag, out item)) {  
                if (item.ItemType == ItemType.Element)  
                    writer.WriteStartElement(item.Text);  
                else if (item.ItemType == ItemType.Attritube) {  
                    len = br.ReadUInt16();  
                    bs = br.ReadBytes(len);  
                    str = Encoding.UTF8.GetString(bs);  
                    writer.WriteAttributeString(item.Text, str);  
                }
  
            }
  
            else if (readFlag == 0) {  
                len = br.ReadUInt16();  
                bs = br.ReadBytes(len);  
                str = Encoding.UTF8.GetString(bs);  
                writer.WriteString(str);  
            }
  
            else if (readFlag == ushort.MaxValue) {  
                writer.WriteEndElement();  
            }
  
        }
  
        writer.Flush();  
        writer.Close();  
        sw.Close();  
        br.Close();  
        return sb.ToString();  

本站热点业务

更多模板/案例展示

热门推荐

关于我们 | 联系我们 | 团队日志 | 网站地图 | 网站合作