'Convert Tags:
Option Public
Option Explicit
Sub Initialize
'** This is a sample agent that demonstrates a method for parsing
'** the HTML tags in a String and converting the undesireable ones
'** to plain HTML text (i.e. -- "<" becomes < and ">" becomes >)
'** version 1.1
'** Julian Robichaux -- http://www.nsftools.com
Dim testString As String
testString = "This is a test of the new
function
. " & Chr(13) & Chr(10) & _
"It should also handle http://linkshttp://links" & _
" blah <> blah hTtP://something" & Chr(0) & Chr(0) & "> < and http://qwerty/blah?asdf (http://blah)."
Print ConvertTags(testString)
End Sub
Function ConvertTags (comment As String) As String
'** This function converts all angle brackets ("<>") in a String to their
'** < and > equivalents, with the exception of a custom subset of
'** tags that are allowed (like or ). The modified String is returned.
Dim lastPos As Integer, startPos As Integer, endPos As Integer
Dim tagString As String
Dim newString As String
lastPos = 1
startPos = Instr(comment, "<")
Do While (startPos > 0)
'** get everything between the last end tag and the current start tag
'** and add it to our newString, replacing any "orphan" > characters
newString = newString & ReplaceSubstring(Mid$(comment, lastPos, startPos - lastPos), ">", ">")
endPos = Instr(startPos, comment, ">")
If (endPos > 0) Then
'** store the text between the < and the > in a variable, for easy access
tagString = Mid$(comment, startPos + 1, endPos - startPos - 1)
Select Case Trim$(Lcase$(tagString))
Case "/a", "b", "/b", "i", "/i", "u", "/u", "p", "br", "pre", "/pre", "blockquote", "/blockquote" :
'** these are the allowable tags. Don't forget to add the closing tag
'** for each opening tag (i.e. -- use "b" and "/b" on your list), and make
'** sure "/a" is on the list if you're allowing tags below
newString = newString & "<" & Trim$(tagString) & ">"
Case Else :
'** if it's not an allowable tag, replace the < and > with
'** < and > (we can check for tags with attributes
'** here too, like )
If (Left$(Trim$(Lcase$(tagString)), 7) = "a href=") Then
'** allow tags -- you may also want to include your own
'** custom routine here to check for "rogue" tags, like
'** ones that contain href="javascript..." or onClick="..." (or you could
'** just disallow tags completely, and just let the end of
'** this routine do the auto-conversion of http:// links for you)
newString = newString & "<" & Trim$(tagString) & ">"
Else
newString = newString & "<" & tagString & ">"
End If
End Select
Else
'** if we have a < without a >, then we've got an "orphan" < character,
'** in which case we can just convert all the remaining < characters to <
newString = newString & ReplaceSubstring(Mid$(comment, startPos), "<", "<")
endPos = Len(comment)
End If
lastPos = endPos + 1
startPos = Instr(lastPos - 1, comment, "<")
Loop
'** convert any "orphan" > characters at the end of the string
If (lastPos <= Len(comment)) Then
newString = newString & ReplaceSubstring(Mid$(comment, lastPos), ">", ">")
End If
'** convert http:// references to links (if they're not inside a tag)
Dim hrefStartPos As Integer, hrefEndPos As Integer
Dim hrefEndChars As String, hrefString As String
hrefEndChars = " " & Chr(0) & Chr(9) & Chr(10) & Chr(13)
hrefStartPos = Instr(1, newString, "http://", 5)
Do While (hrefStartPos > 0)
startPos = Instr(hrefStartPos, newString, "<")
endPos = Instr(hrefStartPos, newString, ">")
If (endPos = 0) Or ((endPos > startPos) And (startPos > 0)) Then
'** if we're not inside a , then convert the link
hrefEndPos = hrefStartPos + 7
'** find the end of the http:// reference
Do While (hrefEndPos <= Len(newString))
If (Instr(hrefEndChars, Mid$(newString, hrefEndPos, 1)) > 0) Then
Exit Do
End If
hrefEndPos = hrefEndPos + 1
Loop
'** make sure that the character at the end of the http:// reference
'** isn't really some punctuation that's probably not part of the URL
'** (these characters aren't strictly illegal, but we're making some
'** educated guesses based on common URL and sentence structure)
Do While (hrefEndPos > hrefStartPos)
If (Instr(".,?!&:-()[]<>{}'""", Mid$(newString, hrefEndPos - 1, 1)) = 0) Then
Exit Do
End If
hrefEndPos = hrefEndPos - 1
Loop
hrefString = Mid$(newString, hrefStartPos, hrefEndPos - hrefStartPos)
newString = Left$(newString, hrefStartPos - 1) & "" & _
hrefString & "" & Mid$(newString, hrefStartPos + Len(hrefString))
hrefEndPos = hrefEndPos + Len("")
Elseif (endPos < startPos) And (endPos > 0) Then
'** if we're inside a tag, assume it's an tag, and skip
'** to the closing tag (so we don't accidentally double-link
'** something like http://blah)
hrefEndPos = Instr(endPos, newString, "", 5)
If (hrefEndPos = 0) Then
hrefEndPos = Len(newString)
End If
Else
hrefEndPos = endPos
End If
hrefStartPos = Instr(hrefEndPos, newString, "http://", 5)
Loop
'** handle linefeeds by replacing double ones with and single ones with
'** (if the resulting String is going to end up in a text field, you might want to use
'** Chr(0) as your linefeed, although you could easily use ReplaceSubstring to
'** convert Chr(13) & Chr(10) to Chr(0) after this function runs)
Dim linefeed As String
linefeed = Chr(13) & Chr(10)
newString = ReplaceSubstring(newString, Chr(13) & Chr(10), Chr(0))
newString = ReplaceSubstring(newString, Chr(13), Chr(0))
newString = ReplaceSubstring(newString, Chr(10), Chr(0))
newString = ReplaceSubstring(newString, Chr(0) & Chr(0), "
" & linefeed)
newString = ReplaceSubstring(newString, Chr(0), "
" & linefeed)
ConvertTags = newString
End Function
Function ReplaceSubstring (Byval fullString As String, oldString As String, newString As String) As String
Dim pos As Integer
pos = Instr(fullString, oldString)
Do While pos > 0
fullString = Left$(fullString, pos - 1) & newString & Mid$(fullString, pos + Len(oldString))
pos = Instr(pos + Len(newString), fullString, oldString)
Loop
ReplaceSubstring = fullString
End Function