The goal is to extract the raw html from Internet Explorer using a BHO. When accessing the HTML via the mshtml.HTMLDocument (DOM) object you get a version of the HTML that is different from what you see when you "right click --> view source". IE changes the code for some reason..
The BHO is written and working, and I have even managed to extract the raw html from the browser by following the posts here: http://www.thescripts.com/forum/thread348964.html
I then tried to read in a couple of HTML documents. The smaller document works fine, no problems at all. With the larger document, it fails to read the entire page, however if you debug the code and step through it works fine!
In reference to the code below, increasing the value in GetStream() makes no difference. If anyone can provide sample code as a solution I would be so grateful I can't even put it into words!
Expand|Select|Wrap|Line Numbers
- HTMLDocument document = (HTMLDocument)base.Explorer.Document;
- IPersistStreamInit ips = (IPersistStreamInit)document;
- IStream strm = RawHtml.GetStream(2048);
- ips.Save(strm, false);
- string rawhtml = RawHtml.StreamToString(strm);
- [RAWHTML CLASS]
- class RawHtml
- {
- [DllImport("ole32.dll")]
- public extern static void CreateStreamOnHGlobal(IntPtr hGlobal, bool fDelete, ref IStream stm);
- [DllImport("ole32.dll")]
- public extern static void GetHGlobalFromStream(IStream stm, ref IntPtr hGlobal);
- public static IStream GetStream(int size)
- {
- IntPtr iptr = Marshal.AllocHGlobal(size);
- IStream strm = null;
- CreateStreamOnHGlobal(iptr, true, ref strm);
- return strm;
- }
- public static string StreamToString(IStream strm)
- {
- IntPtr iptr = new IntPtr();
- string text;
- GetHGlobalFromStream(strm, ref iptr);
- text = Marshal.PtrToStringAnsi(iptr);
- return text;
- }
- }
- [ComVisible(false), ComImport(), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713"),InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
- public interface IPersistStreamInit
- {
- void GetClassID(ref Guid pClassID);
- [PreserveSig()]
- int IsDirty();
- [PreserveSig()]
- int Load(IStream pstm);
- [PreserveSig()]
- int Save(IStream pstm, bool fClearDirty);
- [PreserveSig()]
- int GetSizeMax([InAttribute(), Out(), MarshalAs(UnmanagedType.U8)] ref long pcbSize);
- [PreserveSig()]
- int InitNew();
- }
Ad