/** * Filter Ads (only published [A-za-z0-9 and Kanji,;?.] ), excluding other special symbols * Created by 1 on 2015/8/19. */public class Filterad { public static void Main (string[] args) throws Unsupportedencodingexception { String str = " AFAS141541D1221FS three DF Grand Master ⒉ 12314 ①⑶112 2312,;?.,;?. "; System.out.println (Clearnotchinese (str)); System.out.println (Replaceill (Clearnotchinese (str))); str = "Non-agricultural silver skirt, 55,193,05 in/Line/finger/guide, verify" 730 ""; System.out.println ( Clearnotchinese (str)); System.out.println (Replaceill (Clearnotchinese ( STR)); } public static string Clearnotchinese (String buff) { return Buff.replaceall ("[^a-za-z0-9\u4e00-\u9fa5\u002c-\u002c\u003b-\u003b\ U002e-\u002e\u003f-\u003f "+ "\uff0c-\uff0c\uff1b-\uff1b\uff1f-\uff1f\u3002-\u3002]", ""); } / /1,234,567,890 //4e00 4e8c 4E09 56DB 4E94 516D 4E03 516B 4e5d 5341 //59f9 8d30 53c1 8086 4f0d 9646 67d2 634C 7396 //4e00 4E03 4E09 4e5d 4e8c 4E94 4f0d 516B 516D 5341 //53c1 56 DB 59f9 634C 67d2 7396 8086 8d30 9646 //occurrences of numeric line characters replaced public static String Replaceill (Str ing buff) { String str = Buff.replaceall ("[^a-za-z\u4e01-\u4e02\u4e04-\ u4e08\u4e0a-\u4e5c\u4e5e-\u4e8b "+ "\U4E8D-\U4E93\U4E95-\U4F0C\U4F0E-\U516A\U516C-\U5340\U5342-\U53C0" + "\u53c2-\u56da\u56dc-\u59f8\u59fa-\u634b\u634d-\u67d1\u67d3-\u7395\u7397-\u8085 "+ "\u8087-\u8d2f\u8d31-\u9645\u9647-\u9fa5\u002c-\u002c\ u003b-\u003b\u002e-\u002e\u003f-\u003f "+ "\uff0c-\uff0c\uff1b-\uff1b\uff1f-\uff1f\u3002-\u3002", "#"); str = Str.replaceall ("[^#]", ","); System.out.println (str); String nums[] = Str.split (","); int numcount = 0;//numeric statistics int specount = 0;//interval number for (String num:nums) { if (Num.length () > 5) { System.out.println ("There is QQ advertisement"); }else{ if ( Num.length () >0) { if (Specount > 2) { Numcount = 0; Specount = 0; } numcount+=num.length (); if (Numcount > 5) { System.out.println (" There is QQ advertisement "); Specount = 0; } }else{ specount++; } } } return str; } }
From for notes (Wiz)
Filter ads (can only be published [A-za-z0-9 and Kanji,;?.] ), excluding other special symbols