Many times when using the WebBrowser control, you may want to get the underlying cached file instead of using "document.innerHTML". Using this technique avoids the HTML/JavaScript markup that IE injects into the XML file for display. The key to doing this is using the function called GetUrlCacheEntryInfo. Following is how you would call it from managed code.
[UPDATE] Using the cached file may be the best way in some cases, but another alternative is to access the "XMLDocument" expando property that IE attaches to the document element when browsing to XML documents. Read the "Straight to XML" section this MSDN page for more details.
Public Shared Function GetPathForCachedFile(ByVal fileUrl As String) As String
Dim cacheEntryInfoBufferSize As Integer = 0
Dim cacheEntryInfoBuffer As IntPtr = IntPtr.Zero
Dim lastError As Integer
Dim result As Boolean
Try
'call to see how big the buffer needs to be
result = GetUrlCacheEntryInfo(fileUrl, IntPtr.Zero, cacheEntryInfoBufferSize)
lastError = Marshal.GetLastWin32Error
If result = False Then
If lastError = ERROR_FILE_NOT_FOUND Then
Return Nothing
Else
'noop
End If
'allocate the necessary amount of memory
cacheEntryInfoBuffer = Marshal.AllocHGlobal(cacheEntryInfoBufferSize)
'make call again with properly sized buffer
result = GetUrlCacheEntryInfo(fileUrl, cacheEntryInfoBuffer, cacheEntryInfoBufferSize)
If result = True Then
Dim struct As Object = Marshal.PtrToStructure(cacheEntryInfoBuffer, GetType(LPINTERNET_CACHE_ENTRY_INFO))
Dim internetCacheEntry As LPINTERNET_CACHE_ENTRY_INFO = CType(struct, LPINTERNET_CACHE_ENTRY_INFO)
Dim localFileName As String = Marshal.PtrToStringAuto(internetCacheEntry.lpszLocalFileName)
Return localFileName
Throw New System.ComponentModel.Win32Exception(lastError)
Finally
If Not cacheEntryInfoBuffer.Equals(IntPtr.Zero) Then
Marshal.FreeHGlobal(cacheEntryInfoBuffer)
End Try
End Function
Const ERROR_FILE_NOT_FOUND As Integer = 2
<DllImport("Wininet.dll", SetLastError:=True, CharSet:=CharSet.Auto)> _
Public Shared Function GetUrlCacheEntryInfo( _
ByVal lpszUrlName As String, _
ByVal lpCacheEntryInfo As IntPtr, _
ByRef lpdwCacheEntryInfoBufferSize As Integer) As Boolean
<StructLayout(LayoutKind.Sequential)> _
Public Structure FILETIME
Public dwLowDateTime As Integer
Public dwHighDateTime As Integer
End Structure
Public Structure LPINTERNET_CACHE_ENTRY_INFO
Public dwStructSize As Integer
Public lpszSourceUrlName As IntPtr
Public lpszLocalFileName As IntPtr
Public CacheEntryType As Integer
Public dwUseCount As Integer
Public dwHitRate As Integer
Public dwSizeLow As Integer
Public dwSizeHigh As Integer
Public LastModifiedTime As FILETIME
Public ExpireTime As FILETIME
Public LastAccessTime As FILETIME
Public LastSyncTime As FILETIME
Public lpHeaderInfo As IntPtr
Public dwHeaderInfoSize As Integer
Public lpszFileExtension As IntPtr
Public dwExemptDelta As Integer
Public Structure SYSTEMTIME
Public wYear As Short
Public wMonth As Short
Public wDayOfWeek As Short
Public wDay As Short
Public wHour As Short
Public wMinute As Short
Public wSecond As Short
Public wMilliseconds As Short
<DllImport("kernel32")> _
Public Shared Function FileTimeToSystemTime( _
ByRef lpFileTime As FILETIME, _
ByRef lpSystemTime As SYSTEMTIME) As Boolean
Remember Me
a@href@title, b, blockquote@cite, em, i, strike, strong, sub, super, u
Page rendered at Tuesday, January 06, 2009 12:29:01 PM (Central Standard Time, UTC-06:00)
Disclaimer The opinions expressed herein are my own personal opinions and do not represent my employer's view in anyway.