Thanks so much gang
I added this in this afternoon, and it caught most of the problematic characters. Thanks especially Jenna Huntsman for handling the initial heavy lifting!
The only thing I added was handling for XML characters mentioned here: https://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
It seems like all the characters that were being encoded are ones that might have an impact on HTML/XML.
Other than that, 𝓾𝓷𝓬𝓸𝓶𝓶𝓸𝓷 𝓬𝓱𝓪𝓻𝓪𝓬𝓽𝓮𝓻𝓼 𝓲𝓷 𝓹𝓻𝓸𝓯𝓲𝓵𝓮𝓼 𝓬𝓸𝓶𝓮 𝓽𝓱𝓻𝓸𝓾𝓰𝓱 𝓳𝓾𝓼𝓽 𝓯𝓲𝓷𝓮 ♥●•٠·˙
There's probably a better way to structure them, but here's my edits anyhow
string HTMLreplaceCodedChars(string s_rawHTML)
{
//Replace coded HTML/XML characters with Unicode equivalents. Credit: Jenna Huntsman, KT Kingsley, Quistess Alpha, Haravikk Mistral, AndreRush
//Uses code from http://wiki.secondlife.com/wiki/Combined_Library#Replace
while(llSubStringIndex(s_rawHTML,"&#") != -1) //Loop until no more encoded characters exist in the string.
{
integer i_hit = llSubStringIndex(s_rawHTML,"&#"); //Find coded character start.
integer i_charID = (integer)llGetSubString(s_rawHTML,i_hit+2,i_hit+5); //Find the character number to convert to unicode character.
if(llGetSubString(s_rawHTML,i_hit+5,i_hit+5) == ";") //If it's a 3 value coded character
{
s_rawHTML = llDumpList2String(llParseStringKeepNulls(s_rawHTML, [llGetSubString(s_rawHTML,i_hit,i_hit+5)], []), llChar(i_charID)); //Replace coded character with unicode equiv.
}
else //Nope, it's a 2 value character.
{
s_rawHTML = llDumpList2String(llParseStringKeepNulls(s_rawHTML, [llGetSubString(s_rawHTML,i_hit,i_hit+4)], []), llChar(i_charID)); //Replace coded character with unicode equiv.
}
}
//Below handles various XML characters that are encoded differently. Since we know exactly what character it needs to be,
//we can just replace it without needing to look it up.
while (llSubStringIndex(s_rawHTML,""") != -1)
{
integer i_hit = llSubStringIndex(s_rawHTML,""");
string i_charID = "\"";
s_rawHTML = llDumpList2String(llParseStringKeepNulls(s_rawHTML, [llGetSubString(s_rawHTML,i_hit,i_hit+5)], []), i_charID);
}
while (llSubStringIndex(s_rawHTML,"&") != -1)
{
integer i_hit = llSubStringIndex(s_rawHTML,"&");
string i_charID = "&";
s_rawHTML = llDumpList2String(llParseStringKeepNulls(s_rawHTML, [llGetSubString(s_rawHTML,i_hit,i_hit+4)], []), i_charID);
}
while (llSubStringIndex(s_rawHTML,">") != -1)
{
integer i_hit = llSubStringIndex(s_rawHTML,">");
string i_charID = ">";
s_rawHTML = llDumpList2String(llParseStringKeepNulls(s_rawHTML, [llGetSubString(s_rawHTML,i_hit,i_hit+3)], []), i_charID);
}
while (llSubStringIndex(s_rawHTML,"<") != -1)
{
integer i_hit = llSubStringIndex(s_rawHTML,"<");
string i_charID = "<";
s_rawHTML = llDumpList2String(llParseStringKeepNulls(s_rawHTML, [llGetSubString(s_rawHTML,i_hit,i_hit+3)], []), i_charID);
}
while (llSubStringIndex(s_rawHTML,"'") != -1)
{
integer i_hit = llSubStringIndex(s_rawHTML,"'");
string i_charID = "'";
s_rawHTML = llDumpList2String(llParseStringKeepNulls(s_rawHTML, [llGetSubString(s_rawHTML,i_hit,i_hit+5)], []), i_charID);
}
return s_rawHTML; //pass the new string back
}