java擷取文章的字數或者字元數

來源:互聯網
上載者:User

 package util;
/**
 * 擷取文章的字數或者字元數
 *
 * @author ffshi
 */
public class StatWordCount {

 private final char[] CHS = { ',', ';', '.', '!', '?', ';', '+', '。', '?',
   '!' }; // 符號數組

 private final char[] CHN = { '/n', '/r' }; // 轉義符數組

 private final char[] SPACE = { ' ', ' ' }; // 空格的數組(前半形,後全形)

 /**
  * 根據指定條件來篩選文章的字數
  *
  * @param wordContent
  *            文章內容
  * @param compriseInterpunction
  *            是否包含指定字元
  * @param compriseSpace
  *            是否包含空格
  * @return 返迴文章經過指定篩選後的長度
  */
 public int getWordCount(String wordContent, boolean compriseInterpunction,
   boolean compriseSpace) {
  if (wordContent == null) {
   return 0;
  } else if (wordContent.length() == 0) {
   return 0;
  } else {
   // 既要包含符號又要包含空格
   if (compriseInterpunction && compriseSpace) {
    // 清除轉義符
    String regex = "[" + new String(CHN) + "]";
    wordContent = wordContent.replaceAll(regex, " ");
    return this.getWordCount(wordContent);
   }
   // 不包含符號包含空格
   else if (!compriseInterpunction && compriseSpace) {
    // 使用Regex去掉指定的符號和轉義符
    String regex1 = "[" + new String(CHN) + "]";
    String regex2 = "[" + new String(CHS) + "]";
    wordContent = wordContent.replaceAll(regex1, " ");
    wordContent = wordContent.replaceAll(regex2, " ");
    return this.getWordCount(wordContent);
   }
   // 包含指定符號不包含空格
   else if (compriseInterpunction && !compriseSpace) {
    // 使用Regex去掉空格和轉義符
    String regex1 = "[" + new String(CHN) + "]";
    String regex2 = "[" + new String(SPACE) + "]";
    wordContent = wordContent.replaceAll(regex1, " ");
    wordContent = wordContent.replaceAll(regex2, " ");
    return this.getWordCount(wordContent);
   }
   // 空格和指定符號都不包含
   else {
    // 使用Regex去掉空格,指定符號和轉義符
    String regex1 = "[" + new String(CHN) + "]";
    String regex3 = "[" + new String(CHS) + "]";
    String regex2 = "[" + new String(SPACE) + "]";
    wordContent = wordContent.replaceAll(regex1, " ");
    wordContent = wordContent.replaceAll(regex2, " ");
    wordContent = wordContent.replaceAll(regex3, " ");
    return this.getWordCount(wordContent);
   }
  }
 }

 /**
  * 返迴文章中的字數
  *
  * @param wordCount
  *            文章內容
  * @return
  */
 @SuppressWarnings("unused")
 private int getWordCount(String wordContent) {
  int count = 0;
  if (wordContent == null) { // 判斷是否為null,如果為null直接返回0
   count = 0;
  } else if (wordContent.length() == 0) { // 判斷是否為空白,如果為空白直接返回0
   count = 0;
  } else { // 判斷擷取字數
   wordContent = wordContent.trim(); // 清空空格
   // 臨時變數
   String s4 = "";
   String s3 = "";
   String s1 = "";
   boolean bb = false;
   if (wordContent.length() > 0) {
    s4 = String.valueOf(wordContent
      .charAt(wordContent.length() - 1));
   }
   for (int i = 0; i < wordContent.length(); i++) {
    s3 = String.valueOf(wordContent.charAt(i));
    int num = s3.getBytes().length;
    if (s3.hashCode() == 32 || s3.getBytes().length == 2) {
     bb = true;
    }
    if (num == 2) {
     count++;
    } else {
     if (i + 1 < wordContent.length() && (i > 1)) {
      s1 = String.valueOf(wordContent.charAt(i + 1));
      if ((s1.hashCode() == 32 || s1.getBytes().length == 2)
        && (s3.hashCode() != 32)) {
       count++;
      }
     }
    }
   }
   if (!bb) {
    count++;
   } else {
    if (s4.getBytes().length == 1) {
     count++;
    }
   }
  }
  return count;
 }

 /**
  * 根據條件來擷取文章的字元數
  *
  * @param wordContent
  *            文章內容
  * @param compriseInterpunction
  *            是否包含指定符號
  * @param compriseSpace
  *            是否包含空格
  * @return 返回字元長度
  */
 public int getWordCharacter(String wordContent,
   boolean compriseInterpunction, boolean compriseSpace) {
  // 既要包含符號又要包含空格
  if (compriseInterpunction && compriseSpace) {
   // 清除轉義符
   String regex = "[" + new String(CHN) + "]";
   wordContent = wordContent.replaceAll(regex, " ");
   // 首部的空格不算
   wordContent = wordContent.replaceAll("^//s+", "");
   return wordContent.length();
  }// 不包含符號包含空格
  else if (!compriseInterpunction && compriseSpace) {
   // 首部的空格不算
   wordContent = wordContent.replaceAll("^//s+", "");
   // 使用Regex去掉指定的符號和轉義符
   String regex1 = "[" + new String(CHN) + "]";
   String regex2 = "[" + new String(CHS) + "]";
   wordContent = wordContent.replaceAll(regex1, " ");
   wordContent = wordContent.replaceAll(regex2, " ");
   return wordContent.length();
  }// 包含指定符號不包含空格
  else if (compriseInterpunction && !compriseSpace) {
   // 使用Regex去掉空格和轉義符
   return this.getNoSpaceCount(wordContent);
  }// 空格和指定符號都不包含
  else {
   // 使用Regex去掉指定符號
   String regex1 = "[" + new String(CHS) + "]";
   wordContent = wordContent.replaceAll(regex1, " ");
   return this.getNoSpaceCount(wordContent);
  }
 }

 /**
  * 擷取文章中非空格的字元總數
  *
  * @param wordContent
  *            文章內容
  * @return
  */
 private int getNoSpaceCount(String wordContent) {
  int spaceCount = 0;
  if (wordContent == null) {
   spaceCount = 0;
  } else if (wordContent.length() == 0) {
   spaceCount = 0;
  } else {
   // 替換首部的
   wordContent = wordContent.replaceAll("^//s+", "");
   wordContent = wordContent.replaceAll(" ", "");
   // 使用正則替換轉義符
   String regex = "[" + new String(CHN) + "]";
   wordContent = wordContent.replaceAll(regex, "");
   spaceCount = wordContent.length();
  }
  return spaceCount;
 }
}

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.