文章详情

  • 游戏榜单
  • 软件榜单
关闭导航
热搜榜
热门下载
热门标签
php爱好者> php文档>Html convert to DOM Tree

Html convert to DOM Tree

时间:2010-09-23  来源:_Amo.Jry

添加引用 Microsoft.mshtml

并且把项目的属性中的非安全运行允许

但是感觉是不是 html内容不规则的时候, 会崩溃.

OMG, 是不是能够有更好的方式来解析这个html内容呢>>> 望见文者, 推荐推荐. 谢谢

 

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using mshtml;
using System.Runtime.InteropServices;

namespace Html2DOMTree
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void button1_Click(object sender, EventArgs e)
        {
            string html = "";
            if (richTextBox1.Text != "")
            {
                html = richTextBox1.Text;   //取得html源代码
            }

            IHTMLDocument2 doc2 = Parse(html);
            IHTMLDocument3 HTMLDocument = (IHTMLDocument3)doc2;  //doc2对象转换成HTMLDocument对象
            IHTMLDOMNode rootDomNode = null;

            rootDomNode = (IHTMLDOMNode)HTMLDocument.documentElement; //获取文档根部节点,也就是HTML节点

            TreeNode root = treeView1.Nodes.Add("HTML");     ////加入跟节点
            InsertDOMNodes(rootDomNode, root); //把domnode插入到跟节点中,调用InsertDOMNodes方法

        }

         //解析Dom树
        unsafe IHTMLDocument2 Parse(string s)       //unsafe关键字表示不安全上下文
        {
            IHTMLDocument2 pDocument = new HTMLDocumentClass();
            if (pDocument != null)
            {
                IPersistStreamInit pPersist = pDocument as IPersistStreamInit;  //as运算符类似于强制转换操作;如果转换不可行,as会返回null而不是引发异常。
                pPersist.InitNew();
                pPersist = null;
                IMarkupServices ms = pDocument as IMarkupServices;
                if (ms != null)
                {
                    IMarkupContainer pMC = null;
                    IMarkupPointer pStart, pEnd;
                    ms.CreateMarkupPointer(out pStart);
                    ms.CreateMarkupPointer(out pEnd);
                    System.Text.StringBuilder sb = new System.Text.StringBuilder(s);
                    IntPtr pSource = Marshal.StringToHGlobalUni(s);
                    ms.ParseString(ref *(ushort*)pSource.ToPointer(), 0, out pMC, pStart, pEnd);
                    if (pMC != null)
                    {
                        Marshal.Release(pSource);
                        return pMC as IHTMLDocument2;
                    }
                    Marshal.Release(pSource);
                }
            }
            return null;
        }
        //插入Dom树
        public void InsertDOMNodes(IHTMLDOMNode parentnode, TreeNode tree_node)
        {
            if (parentnode.hasChildNodes())//是否有子结点
            {
                IHTMLDOMChildrenCollection allchild = (IHTMLDOMChildrenCollection)parentnode.childNodes;
                int length = allchild.length;
                for (int i = 0; i < length; i++)//对每个子结点进行处理,首先取出每个子节点的属性,然后进行递归
                {
                    IHTMLDOMNode child_node = (IHTMLDOMNode)allchild.item(i);
                    string m_snodeName = child_node.nodeName;
                    object m_onodevalue = child_node.nodeValue;
                    string m_snodetype = child_node.nodeType.ToString();
                    string m_snodevalue = "";
                    if (m_onodevalue != null)
                        m_snodevalue = m_onodevalue.ToString().Trim();
                    TreeNode tempnode = null;

                    if (child_node.nodeName.Equals("#text"))
                    {
                        if ((m_snodevalue != null) && (!m_snodevalue.Equals("")))
                        {
                            tempnode = tree_node.Nodes.Add(m_snodevalue);
                        }
                    }
                    else
                    {
                        tempnode = tree_node.Nodes.Add(child_node.nodeName);
                        InsertDOMNodes(child_node, tempnode);
                    }
                }
            }
        } 
    }

    [ComVisible(true), ComImport(), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
    public interface IPersistStreamInit
    {
        void GetClassID([In, Out] ref Guid pClassID);
        [return: MarshalAs(UnmanagedType.I4)]
        [PreserveSig]
        int IsDirty();
        void Load([In, MarshalAs(UnmanagedType.Interface)] UCOMIStream pstm);
        void Save([In, MarshalAs(UnmanagedType.Interface)] UCOMIStream pstm,
         [In, MarshalAs(UnmanagedType.I4)] int fClearDirty);
        void GetSizeMax([Out, MarshalAs(UnmanagedType.LPArray)] long pcbSize);
        void InitNew();
    }
}

相关阅读 更多 +
排行榜 更多 +
辰域智控app

辰域智控app

系统工具 下载
网医联盟app

网医联盟app

运动健身 下载
汇丰汇选App

汇丰汇选App

金融理财 下载