package util;
/**
* 擷取文章的字數或者字元數
*
* @author ffshi
*/
public class StatWordCount {
private final char[] CHS = { ',', ';', '.', '!', '?', ';', '+', '。', '?',
'!' }; // 符號數組
private final char[] CHN = { '/n', '/r' }; // 轉義符數組
private final char[] SPACE = { ' ', ' ' }; // 空格的數組(前半形,後全形)
/**
* 根據指定條件來篩選文章的字數
*
* @param wordContent
* 文章內容
* @param compriseInterpunction
* 是否包含指定字元
* @param compriseSpace
* 是否包含空格
* @return 返迴文章經過指定篩選後的長度
*/
public int getWordCount(String wordContent, boolean compriseInterpunction,
boolean compriseSpace) {
if (wordContent == null) {
return 0;
} else if (wordContent.length() == 0) {
return 0;
} else {
// 既要包含符號又要包含空格
if (compriseInterpunction && compriseSpace) {
// 清除轉義符
String regex = "[" + new String(CHN) + "]";
wordContent = wordContent.replaceAll(regex, " ");
return this.getWordCount(wordContent);
}
// 不包含符號包含空格
else if (!compriseInterpunction && compriseSpace) {
// 使用Regex去掉指定的符號和轉義符
String regex1 = "[" + new String(CHN) + "]";
String regex2 = "[" + new String(CHS) + "]";
wordContent = wordContent.replaceAll(regex1, " ");
wordContent = wordContent.replaceAll(regex2, " ");
return this.getWordCount(wordContent);
}
// 包含指定符號不包含空格
else if (compriseInterpunction && !compriseSpace) {
// 使用Regex去掉空格和轉義符
String regex1 = "[" + new String(CHN) + "]";
String regex2 = "[" + new String(SPACE) + "]";
wordContent = wordContent.replaceAll(regex1, " ");
wordContent = wordContent.replaceAll(regex2, " ");
return this.getWordCount(wordContent);
}
// 空格和指定符號都不包含
else {
// 使用Regex去掉空格,指定符號和轉義符
String regex1 = "[" + new String(CHN) + "]";
String regex3 = "[" + new String(CHS) + "]";
String regex2 = "[" + new String(SPACE) + "]";
wordContent = wordContent.replaceAll(regex1, " ");
wordContent = wordContent.replaceAll(regex2, " ");
wordContent = wordContent.replaceAll(regex3, " ");
return this.getWordCount(wordContent);
}
}
}
/**
* 返迴文章中的字數
*
* @param wordCount
* 文章內容
* @return
*/
@SuppressWarnings("unused")
private int getWordCount(String wordContent) {
int count = 0;
if (wordContent == null) { // 判斷是否為null,如果為null直接返回0
count = 0;
} else if (wordContent.length() == 0) { // 判斷是否為空白,如果為空白直接返回0
count = 0;
} else { // 判斷擷取字數
wordContent = wordContent.trim(); // 清空空格
// 臨時變數
String s4 = "";
String s3 = "";
String s1 = "";
boolean bb = false;
if (wordContent.length() > 0) {
s4 = String.valueOf(wordContent
.charAt(wordContent.length() - 1));
}
for (int i = 0; i < wordContent.length(); i++) {
s3 = String.valueOf(wordContent.charAt(i));
int num = s3.getBytes().length;
if (s3.hashCode() == 32 || s3.getBytes().length == 2) {
bb = true;
}
if (num == 2) {
count++;
} else {
if (i + 1 < wordContent.length() && (i > 1)) {
s1 = String.valueOf(wordContent.charAt(i + 1));
if ((s1.hashCode() == 32 || s1.getBytes().length == 2)
&& (s3.hashCode() != 32)) {
count++;
}
}
}
}
if (!bb) {
count++;
} else {
if (s4.getBytes().length == 1) {
count++;
}
}
}
return count;
}
/**
* 根據條件來擷取文章的字元數
*
* @param wordContent
* 文章內容
* @param compriseInterpunction
* 是否包含指定符號
* @param compriseSpace
* 是否包含空格
* @return 返回字元長度
*/
public int getWordCharacter(String wordContent,
boolean compriseInterpunction, boolean compriseSpace) {
// 既要包含符號又要包含空格
if (compriseInterpunction && compriseSpace) {
// 清除轉義符
String regex = "[" + new String(CHN) + "]";
wordContent = wordContent.replaceAll(regex, " ");
// 首部的空格不算
wordContent = wordContent.replaceAll("^//s+", "");
return wordContent.length();
}// 不包含符號包含空格
else if (!compriseInterpunction && compriseSpace) {
// 首部的空格不算
wordContent = wordContent.replaceAll("^//s+", "");
// 使用Regex去掉指定的符號和轉義符
String regex1 = "[" + new String(CHN) + "]";
String regex2 = "[" + new String(CHS) + "]";
wordContent = wordContent.replaceAll(regex1, " ");
wordContent = wordContent.replaceAll(regex2, " ");
return wordContent.length();
}// 包含指定符號不包含空格
else if (compriseInterpunction && !compriseSpace) {
// 使用Regex去掉空格和轉義符
return this.getNoSpaceCount(wordContent);
}// 空格和指定符號都不包含
else {
// 使用Regex去掉指定符號
String regex1 = "[" + new String(CHS) + "]";
wordContent = wordContent.replaceAll(regex1, " ");
return this.getNoSpaceCount(wordContent);
}
}
/**
* 擷取文章中非空格的字元總數
*
* @param wordContent
* 文章內容
* @return
*/
private int getNoSpaceCount(String wordContent) {
int spaceCount = 0;
if (wordContent == null) {
spaceCount = 0;
} else if (wordContent.length() == 0) {
spaceCount = 0;
} else {
// 替換首部的
wordContent = wordContent.replaceAll("^//s+", "");
wordContent = wordContent.replaceAll(" ", "");
// 使用正則替換轉義符
String regex = "[" + new String(CHN) + "]";
wordContent = wordContent.replaceAll(regex, "");
spaceCount = wordContent.length();
}
return spaceCount;
}
}