I'm creating a database that will pull some data from a website and then do the necessary manipulations to it. The BE of the database is SQL Server with an Access FE. I have gotten as far as being able to pull the source code from the website and then extract just the HTML table. I'm now stuck with the HTML table code stored in a string variable and I don't know how to proceed from here. I could use the InStr(), and Mid() functions to get the values out of the table and then write them to a table, but I was hoping that I could just import the data so that I wouldn't have to write as much code that gets difficult to figure out because of nested InStr() and Mid() functions.
8 7848 Rabbit 12,516
Recognized Expert Moderator MVP
Rather than a bunch of mid and instr methods, I would use split or regular expressions. I don't think there's a native function to inport html data.
ADezii 8,834
Recognized Expert Expert
How about Posting a sample of the Data contained in your String Variable.
I wrote this code several years ago to down load baseball stats from espn. It reads one character at a time.
saves the output into a comma seperated file.
You can tweak it to your needs.
Have a reference to microsoft scripting runtime for file system object. - Option Compare Database
-
Option Explicit
-
-
Enum RdProcess
-
tableout
-
tablein
-
tagin
-
tagout
-
fieldin
-
fieldout
-
rowskip
-
Readstart
-
Readend
-
End Enum
-
-
-
Private Declare Function URLDownloadToFile Lib "urlmon" Alias _
-
"URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal _
-
szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
-
-
Public Function DownloadFilefromWeb(Url As String, strSavePath As String) As Boolean
-
Dim ret As Long
-
ret = URLDownloadToFile(0, Url, strSavePath, 0, 0)
-
If ret = 0 Then
-
DownloadFilefromWeb = True
-
Else
-
DownloadFilefromWeb = False
-
End If
-
End Function
-
-
Public Sub dofiles()
-
Dim fso As FileSystemObject, tso As TextStream, tsoi As TextStream, i As Integer, spath As String, Url As String
-
Dim strSavePath As String
-
spath = "E:\strato\"
-
Set fso = CreateObject("Scripting.FileSystemObject")
-
-
Set tso = fso.OpenTextFile(spath & "newpit.txt", ForWriting, True, 0)
-
Url = "http://espn.go.com/mlb/stats/pitching/_/qualified/false/order/false"
-
strSavePath = spath & "temp_pit_file.txt"
-
i = 1
-
Do While DownloadFilefromWeb(Url, strSavePath)
-
Set tsoi = fso.OpenTextFile(strSavePath, ForReading, False, 0)
-
If get_stats_table(tso, tsoi) Then
-
tsoi.Close
-
Exit Do
-
End If
-
tsoi.Close
-
Url = "http://espn.go.com/mlb/stats/pitching/_/count/" & i & "/qualified/false/order/false"
-
i = i + 40 'forty players per page
-
Loop
-
tso.Close
-
-
Set tso = fso.OpenTextFile(spath & "newbat.txt", ForWriting, True, 0)
-
Url = "http://espn.go.com/mlb/stats/batting/_/qualified/false"
-
strSavePath = spath & "temp_bat_file.txt"
-
i = 1
-
Do While DownloadFilefromWeb(Url, strSavePath)
-
Set tsoi = fso.OpenTextFile(strSavePath, ForReading, False, 0)
-
If get_stats_table(tso, tsoi) Then
-
tsoi.Close
-
Exit Do
-
End If
-
tsoi.Close
-
Url = "http://espn.go.com/mlb/stats/batting/_/count/" & i & "/qualified/false"
-
i = i + 40
-
Loop
-
-
tso.Close
-
End Sub
-
Public Function get_stats_table(tso As TextStream, tsoi As TextStream) As Boolean
-
Dim i As Integer, s As String
-
Dim tagVal As String, fval As String
-
Dim stat As RdProcess, FoundTable As Boolean, readtable As Boolean, decount As Integer
-
-
FoundTable = False
-
stat = tableout
-
-
Do
-
s = tsoi.Read(1)
-
Select Case stat
-
Case tableout
-
tagVal = Right(tagVal, 5) + s
-
If tagVal = "<Table" Then stat = tablein
-
Case tablein
-
If s = "<" Then ' start reading a tag else ignor the stuff
-
stat = tagin
-
tagVal = "<"
-
End If
-
Case tagin
-
tagVal = tagVal + s
-
If s = ">" Then
-
stat = Handle_the_Tag(tagVal, fval, tso, FoundTable, decount)
-
tagVal = ""
-
If stat = Readend Then
-
Exit Do
-
End If
-
End If
-
Case tagout ' decifer the tag
-
If s = "<" Then
-
stat = tagin
-
tagVal = s
-
End If
-
Case fieldin
-
If s = "<" Then
-
tagVal = "<"
-
stat = tagin
-
Else
-
fval = fval + s
-
End If
-
Case rowskip
-
tagVal = Right(tagVal, 4) + s
-
If tagVal = "</tr>" Then stat = tagout
-
Case Readend
-
Exit Do
-
End Select
-
Loop Until tsoi.AtEndOfStream
-
' tso.Write (s)
-
get_stats_table = CBool(decount < 40)
-
End Function
-
'<table </table>
-
'<td </td> <td align="left">WAS</td>
-
'<tr </tr>
-
Function Handle_the_Tag(tagVal As String, fval As String, tso As TextStream, FoundTable As Boolean, decount As Integer) As RdProcess
-
Dim p As Integer
-
If tagVal Like "</td>" Then 'write comma space
-
If FoundTable Then
-
tso.Write fval & ", "
-
Else
-
If fval = "Sortable Batting" Then
-
FoundTable = True
-
End If
-
If fval = "Sortable Pitching" Then
-
FoundTable = True
-
End If
-
End If
-
fval = ""
-
Handle_the_Tag = tagout
-
ElseIf tagVal Like "<td*>" Then
-
fval = ""
-
Handle_the_Tag = fieldin
-
ElseIf tagVal = "</table>" Then
-
If FoundTable Then
-
Handle_the_Tag = Readend 'done reading file
-
Else
-
Handle_the_Tag = tableout
-
End If
-
ElseIf tagVal = "</tr>" Then 'write newline
-
If FoundTable Then tso.Write vbCrLf
-
Handle_the_Tag = tagout
-
ElseIf tagVal Like "<tr class=""colhead""*>" Then
-
Handle_the_Tag = rowskip
-
ElseIf tagVal Like "<a href=*" Then 'get Id and write id comma space
-
If FoundTable Then
-
p = InStrRev(tagVal, "id/") '<a href="/mlb/players/profile?playerId=4949"> Willie Harris</a>
-
'<a href="http://espn.go.com/mlb/player/_/id/6261/scott-baker"> Scott Baker
-
fval = "" & val(Mid(tagVal, p + 3))
-
decount = decount + 1
-
p = InStrRev(tagVal, "/")
-
fval = fval & ", " & Mid(tagVal, p + 1)
-
fval = Replace(fval, """>", "")
-
-
tso.Write fval & ", "
-
fval = ""
-
Handle_the_Tag = fieldin
-
Else
-
Handle_the_Tag = tagout
-
End If
-
Else
-
Handle_the_Tag = fieldin
-
End If
-
End Function
-
-
- <table class="table1" cellpadding="0" cellspacing="0" width="185">
-
<colgroup class="stdColgroup">
-
<col span="1" class="hdrCol"><col span="4" class="stdCol">
-
</colgroup>
-
<thead>
-
<tr><th colspan="5"><a href="/pmms/" target="_top">Freddie Mac's Primary Mortgage Market Survey</a>®</th></tr>
-
<tr class="theadSub"><td> </td><td>30YR FRM</td><td>15YR FRM</td><td>5YR ARM</td><td>1YR ARM</td></tr>
-
</thead>
-
-
<tfoot>
-
<tr><td colspan="5">Copyright 2013, <a href="http://www.freddiemac.com/" target="_top">Freddie Mac</a>.
-
Averages are for conforming mortgages with 20% down.</td></tr>
-
</tfoot>
-
<tr><th>Avg.</th><td>4.32 </td><td>3.37 </td>
-
<td>3.07 </td><td>2.63 </td></tr>
-
<tr><th>Fees & Points</th><td>0.7 </td><td>0.7 </td>
-
<td>0.5 </td><td>0.4 </td></tr>
-
-
</table>
-
That is the HTML Table code. I know that I can import an HTML document using the import wizard in Access 2010, so I suppose that I could save the text as a file and then import the file, but I don't know how to do those steps either.
I will have to look at the Split and Regular functions to see how I can use them as I'm not familiar with those.
ADezii 8,834
Recognized Expert Expert
Within a couple of minutes I was able to extract most of the Primary Data as such: - Dim strHTML As String
-
Dim varSplit As Variant
-
Dim intCtr As Integer
-
-
strHTML = "<table class='table1' cellpadding='0' cellspacing='0' width='185'>" & _
-
"<colgroup class='stdColgroup'>" & _
-
"<col span='1' class='hdrCol'><col span='4' class='stdCol'>" & _
-
"</colgroup>" & _
-
"<thead>" & _
-
"<tr><th colspan='5'><a href='/pmms/' target='_top'>Freddie Mac's Primary Mortgage Market Survey</a>®</th></tr>" & _
-
"<tr class='theadSub'><td> </td><td>30YR FRM</td><td>15YR FRM</td><td>5YR ARM</td><td>1YR ARM</td></tr>" & _
-
"</thead>" & _
-
"<tfoot>" & _
-
"<tr><td colspan='5'>Copyright 2013, <a href='http://www.freddiemac.com/' target='_top'>Freddie Mac</a>." & _
-
"Averages are for conforming mortgages with 20% down.</td></tr>" & _
-
"</tfoot>" & _
-
"<tr><th>Avg.</th><td>4.32 </td><td>3.37 </td>" & _
-
"<td>3.07 </td><td>2.63 </td></tr>" & _
-
"<tr><th>Fees & Points</th><td>0.7 </td><td>0.7 </td>" & _
-
"<td>0.5 </td><td>0.4 </td></tr>" & _
-
"</table>"
-
-
varSplit = Split(strHTML, "<td>")
-
-
For intCtr = LBound(varSplit) To UBound(varSplit)
-
Debug.Print Split(varSplit(intCtr), " ")(0)
-
Next
OUTPUT: - <table
-
</td>
-
30YR
-
15YR
-
5YR
-
1YR
-
4.32
-
3.37
-
3.07
-
2.63
-
0.7
-
0.7
-
0.5
-
0.4
I appreciate all the help. Unfortunately, I have been pulled into another task right now, but hopefully I will be able to test all of your solutions in the next few days. I will post back the results and if I need any help. Thanks again.
I have finally gotten back to this. Thanks ADezii. It worked perfectly and it is extremely easy to implement.
ADezii 8,834
Recognized Expert Expert
You are quite weldcome, Seth.
Sign in to post your reply or Sign up for a free account.
Similar topics |
by: John Kolvereid |
last post by:
How does one import data INTO a table. I am forced to read files (\i)
w/ a ton of insert statements. I am trying to import approx 800 lines
into the table. However, I must bundle them into a series of files
because of the bug in PGSQL (the buffer overflows). Interestingly,
the literature claims that the one feature of PostgreSQL is SPEED....
|
by: William Fields |
last post by:
Hello,
I've used FrontPage to build some simple websites and am interested in what
Visual Studio and ASP.NET can give me.
Is there a way to import a website into a Visual Studio Web project like you
can with FrontPage?
Thanks.
|
by: Schultz |
last post by:
I would like to know if it is possible to import data from MS Excel
2000 to a dataset using asp.net. The excel file would need to be
uploaded to the server from a web page first, before the loaded into a
dataset. After the data is stored in the dataset, i would need to write
the information to an access db. If it is possible to do this, where...
|
by: Deep |
last post by:
i whould u like to import data from sql server database into excel
sheet using vb.net
|
by: barrybevel |
last post by:
Hi,
I'm trying to login to the www.vodafone.ie website using
HttpWebRequest.
It works fine with IE/Firefox and the .NET Web Control too, just not
with my code.
I think it's a redirect 302 problem.
I'm using this code in a ASP.NET 2.0 application just in case that
matters,
maybe someone knows a better way to do this?
| |
by: Nick Craig-Wood |
last post by:
I've been dumping a database in a python code format (for use with
Python on S60 mobile phone actually) and I've noticed that it uses
absolutely tons of memory as compared to how much the data structure
actually needs once it is loaded in memory.
The programs below create a file (z.py) with a data structure in which
looks like this
--...
|
by: crs27 |
last post by:
Hai All,
I want to import data from Oracle 10g to POstgreSQl 8.2.Would like to know if their is any way to import data from pgAdmin III.
Thought of using Copy command,but for this the files needs to be either in Ascii or Binary formate.
Kindly suggest me any means of importing data from oracle to postgres.
Thank You
|
by: nuray |
last post by:
I have a spreadsheet where I run a query in Access and copy it into the specific spreadsheets. I was trying to use Data->import external data->import data tool in excel to get the data automaticly w/o me going into Access and running the query and pasting into the spreadsheet. However, I can go to the database using this tool in excel, but I can...
|
by: a |
last post by:
Dear friends
I want import data from CSV file to mdb file How can I do that in vb.net?
|
by: cindy7 |
last post by:
how can i read excel data and import into sql using cfm???
I have a CFM form for user to import excel file into database and save file in the folder at server location.
I would like to read that excel file and import data into sql database each time the user upload the file and create new record.
Any idea how to solve this problem, every...
|
by: Hystou |
last post by:
Most computers default to English, but sometimes we require a different language, especially when relocating. Forgot to request a specific language before your computer shipped? No problem! You can effortlessly switch the default language on Windows 10 without reinstalling. I'll walk you through it.
First, let's disable language...
| |
by: jinu1996 |
last post by:
In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven tapestry of website design and digital marketing. It's not merely about having a website; it's about crafting an immersive digital experience that...
|
by: Hystou |
last post by:
Overview:
Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For...
|
by: tracyyun |
last post by:
Dear forum friends,
With the development of smart home technology, a variety of wireless communication protocols have appeared on the market, such as Zigbee, Z-Wave, Wi-Fi, Bluetooth, etc. Each protocol has its own unique characteristics and advantages, but as a user who is planning to build a smart home system, I am a bit confused by the...
|
by: agi2029 |
last post by:
Let's talk about the concept of autonomous AI software engineers and no-code agents. These AIs are designed to manage the entire lifecycle of a software development project—planning, coding, testing, and deployment—without human intervention. Imagine an AI that can take a project description, break it down, write the code, debug it, and then...
|
by: isladogs |
last post by:
The next Access Europe User Group meeting will be on Wednesday 1 May 2024 starting at 18:00 UK time (6PM UTC+1) and finishing by 19:30 (7.30PM).
In this session, we are pleased to welcome a new presenter, Adolph Dupré who will be discussing some powerful techniques for using class modules.
He will explain when you may want to use classes...
|
by: TSSRALBI |
last post by:
Hello
I'm a network technician in training and I need your help.
I am currently learning how to create and manage the different types of VPNs and I have a question about LAN-to-LAN VPNs.
The last exercise I practiced was to create a LAN-to-LAN VPN between two Pfsense firewalls, by using IPSEC protocols.
I succeeded, with both firewalls in...
| |
by: adsilva |
last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
|
by: bsmnconsultancy |
last post by:
In today's digital era, a well-designed website is crucial for businesses looking to succeed. Whether you're a small business owner or a large corporation in Toronto, having a strong online presence can significantly impact your brand's success. BSMN Consultancy, a leader in Website Development in Toronto offers valuable insights into creating...
| |