User:AmiDaniel/VP/RSS source
Appearance
< User:AmiDaniel | VP
The following is the method being used in VandalProof version 1.3 to retrieve items from the recent changes RSS feed, and as I've had many requests for it, I decided to copy it here. This method will work for an RSS RC feed on any foundation Wiki regardless of language, etc.
To use it, you will need to copy the following code into a module. Then you will need to retrieve the innerHTML of the RSS feed you wish to scrape (on en.wikipedia, it can be found at http://en.wikipedia.org/w/index.php?title=Special:Recentchanges&feed=rss). Then pass the innerHTML to SplitItems (like so: SplitItems WB_RSS.Document.body.innerHTML). That will then populate the RSSItems variable with every RC item it finds in the feed.
Option Explicit Public Type RSSItem BodyContent As String sUser As String sArticleName As String sPageAddress As String sSummary As String sAdded As String sRemoved As String sMatches As String sNewTime As String sOldTime As String End Type Public RSSItems() As RSSItem Public Sub SplitItems(ByVal str$) Dim i% On Error Resume Next i = UBound(RSSItems) If Err Then Err.Clear ReDim RSSItems(0) End If On Error GoTo 0 Do Until InStr(1, LCase(str), "<item>") = 0 ReDim Preserve RSSItems(i) With RSSItems(i) .BodyContent = Left(str, InStr(1, LCase(str), "<item>") - 1) .BodyContent = FindAndReplace(.BodyContent, """/w", """" & GlVars.Root & "/w") .sArticleName = BetwixtStr(.BodyContent, "<title>", "</title>") .sPageAddress = BetwixtStr(.BodyContent, "<link>", "</link>") .sUser = BetwixtStr(.BodyContent, "<dc:creator>", "</dc:creator>") .sNewTime = BetwixtStr(.BodyContent, "<pubDate>", "</pubdate>") .sSummary = BetwixtStr(.BodyContent, "<p>", "</p>") .sSummary = FindAndReplace(.sSummary, "<span class=autocomment>", "/*") .sSummary = FindAndReplace(.sSummary, "</span>", "*/") .sAdded = GetAdded(.BodyContent) .sRemoved = GetRemoved(.BodyContent) .BodyContent = FindAndReplace(.BodyContent, "<link>" & .sPageAddress & "</link>", "<H2><A href=""" & .sPageAddress & """>" & .sArticleName & "</A> (<A href=""" & _ .sPageAddress & "?diff=cur"">last diff</A>) (<A href=""" & GlVars.Root & "/w/index.php?title=" & Trim(StrtoHTML(.sArticleName)) & "&action=history"">hist</A>)</H2>") If .sArticleName = "" & GlVars.SpecialText & "Log/newusers" Then .BodyContent = .BodyContent & Chr(13) & Chr(10) & Chr(13) & Chr(10) & "<H3><A href=""$bcur:indef-unsername"">Block Username</A></H3>" End If End With str = Right(str, Len(str) - InStr(1, LCase(str), "<item>") - Len("<item>") + 1) 'If InStr(1, lastinstr, LCase(str), "<item>") > 0 Then lastinstr = InStr(1, lastinstr, LCase(str), "<item>") i = i + 1 Loop End Sub Public Function BetwixtStr$(ByVal sIn$, ByVal sFirst$, ByVal sLast$) If InStr(sIn, sLast) Then BetwixtStr = Left(sIn, InStrRev(sIn, sLast) - 1) If InStr(BetwixtStr, sFirst) Then BetwixtStr = Right(BetwixtStr, Len(BetwixtStr) - InStr(BetwixtStr, sFirst) - Len(sFirst) + 1) End If End If End Function Public Function GetAdded$(ByVal sIn$) Dim fields Debug.Print Debug.Print sIn If InStr(1, sIn, "<p><b>New page</b></p>") Then GetAdded = "##NEWPAGE##" Do Until InStr(1, UCase(sIn), "<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") = 0 sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), "<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") - Len("<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") + 1) GetAdded = GetAdded & sIn GetAdded = Left(GetAdded, InStr(1, LCase(GetAdded), "</td>") - 1) Loop GetAdded = FindAndReplace(GetAdded, "<span style=""FONT-WEIGHT: bold; COLOR: red"">", "") GetAdded = FindAndReplace(GetAdded, "</span>", "") GetAdded = FindAndReplace(GetAdded, "</sup>", "") End Function Public Function GetRemoved$(ByVal sIn$) Dim fields 'If InStr(1, sIn, "<p><b>New page</b></p>") Then GetRemoved = "##NEWPAGE##" Do Until InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) = 0 sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) - Len("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">") + 1) GetRemoved = GetRemoved & sIn GetRemoved = Left(GetRemoved, InStr(1, LCase(GetRemoved), "</td>") - 1) Loop GetRemoved = FindAndReplace(GetRemoved, "<span style=""FONT-WEIGHT: bold; COLOR: red"">", "") GetRemoved = FindAndReplace(GetRemoved, "</span>", "") GetRemoved = FindAndReplace(GetRemoved, "</sup>", "") End Function Function FindAndReplace(ByVal strIn$, ByVal strFind$, ByVal strReplace$) Dim lastInstr%, lastInstr_New% lastInstr = 1 Do Until InStr(lastInstr, strIn, strFind) = 0 lastInstr_New = InStr(lastInstr, strIn, strFind) strIn = Left(strIn, InStr(lastInstr, strIn, strFind) - 1) & strReplace & Right(strIn, Len(strIn) - InStr(lastInstr, strIn, strFind) - Len(strFind) + 1) lastInstr = lastInstr_New Loop FindAndReplace = strIn End Function