先记下来,以作备用!
///
<summary>
去除HTML标记
///
/// </summary>
/// <param name="Htmlstring"> 包括HTML的源码 </param>
/// <returns> 已经去除后的文字 </returns>
public static string GetNoHTMLString( string Htmlstring)
{
// 删除脚本
Htmlstring = Regex.Replace(Htmlstring, @" <script[^>]*?>.*?</script> " , "" , RegexOptions.IgnoreCase);
// 删除HTML
Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)> " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" ([\r\n])[\s]+ " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" --> " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" <!--.* " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(quot|#34); " , " \ "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(amp|#38); " , " & " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(lt|#60); " , " < " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(gt|#62); " , " > " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(nbsp|#160); " , " " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(iexcl|#161); " , " \xa1 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(cent|#162); " , " \xa2 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(pound|#163); " , " \xa3 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(copy|#169); " , " \xa9 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" (\d+); " , "" , RegexOptions.IgnoreCase);
Htmlstring.Replace( " < " , "" );
Htmlstring.Replace( " > " , "" );
Htmlstring.Replace( " \r\n " , "" );
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
/// <summary> 获取显示的字符串,可显示HTML标签,但把危险的HTML标签过滤,如iframe,script等。
///
/// </summary>
/// <param name="str"> 未处理的字符串 </param>
/// <returns></returns>
public static string GetSafeHTMLString( string str)
{
str = Regex.Replace(str, @" <applet[^>]*?>.*?</applet> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <body[^>]*?>.*?</body> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <embed[^>]*?>.*?</embed> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <frame[^>]*?>.*?</frame> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <script[^>]*?>.*?</script> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <frameset[^>]*?>.*?</frameset> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <html[^>]*?>.*?</html> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <iframe[^>]*?>.*?</iframe> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <style[^>]*?>.*?</style> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <layer[^>]*?>.*?</layer> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <link[^>]*?>.*?</link> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <ilayer[^>]*?>.*?</ilayer> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <meta[^>]*?>.*?</meta> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <object[^>]*?>.*?</object> " , "" , RegexOptions.IgnoreCase);
return str;
}
///
/// </summary>
/// <param name="Htmlstring"> 包括HTML的源码 </param>
/// <returns> 已经去除后的文字 </returns>
public static string GetNoHTMLString( string Htmlstring)
{
// 删除脚本
Htmlstring = Regex.Replace(Htmlstring, @" <script[^>]*?>.*?</script> " , "" , RegexOptions.IgnoreCase);
// 删除HTML
Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)> " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" ([\r\n])[\s]+ " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" --> " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" <!--.* " , "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(quot|#34); " , " \ "" , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(amp|#38); " , " & " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(lt|#60); " , " < " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(gt|#62); " , " > " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(nbsp|#160); " , " " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(iexcl|#161); " , " \xa1 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(cent|#162); " , " \xa2 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(pound|#163); " , " \xa3 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" &(copy|#169); " , " \xa9 " , RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" (\d+); " , "" , RegexOptions.IgnoreCase);
Htmlstring.Replace( " < " , "" );
Htmlstring.Replace( " > " , "" );
Htmlstring.Replace( " \r\n " , "" );
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
/// <summary> 获取显示的字符串,可显示HTML标签,但把危险的HTML标签过滤,如iframe,script等。
///
/// </summary>
/// <param name="str"> 未处理的字符串 </param>
/// <returns></returns>
public static string GetSafeHTMLString( string str)
{
str = Regex.Replace(str, @" <applet[^>]*?>.*?</applet> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <body[^>]*?>.*?</body> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <embed[^>]*?>.*?</embed> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <frame[^>]*?>.*?</frame> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <script[^>]*?>.*?</script> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <frameset[^>]*?>.*?</frameset> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <html[^>]*?>.*?</html> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <iframe[^>]*?>.*?</iframe> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <style[^>]*?>.*?</style> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <layer[^>]*?>.*?</layer> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <link[^>]*?>.*?</link> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <ilayer[^>]*?>.*?</ilayer> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <meta[^>]*?>.*?</meta> " , "" , RegexOptions.IgnoreCase);
str = Regex.Replace(str, @" <object[^>]*?>.*?</object> " , "" , RegexOptions.IgnoreCase);
return str;
}