本文執行個體講述了java實現求兩個字串最大公用子串的方法。分享給大家供大家參考,具體如下:
最近在項目工作中有一個關於文本對比的需求,經過這段時間的學習,總結了這篇部落格內容:求兩個字串的最大公用子串。
演算法思想:基於圖計算兩字串的公用子串。具體演算法思想參照下圖:
輸入字串S1:achmacmh 輸入字串S2:macham
- 第a步,是將字串s1,s2分別按位元組拆分,構成一個二維數組;
- 二維數組中的值如b所示,比如第一行第一列的值表示字串s2和s1的第一個位元組是否相等,若相等就是1,否則就是0,最終產生b所示的二維數組;
- 分別求二維數組中斜線上的公用因子(斜線為元素a右下角值,即a[i][j]的下一個元素是a[i+1][j+1];公用因子為1所在的位置構成的字串);
- 對所有公用因子排序,返回最大的公用因子的值。
具體的實現代碼如下所示:
package cn.lulei.compare; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; public class StringCompare { private int a; private int b; public String getMaxLengthCommonString(String s1, String s2) { if (s1 == null || s2 == null) { return null; } a = s1.length();//s1長度做行 b = s2.length();//s2長度做列 if(a== 0 || b == 0) { return ""; } //設定匹配矩陣 boolean [][] array = new boolean[a][b]; for (int i = 0; i < a; i++) { char c1 = s1.charAt(i); for (int j = 0; j < b; j++) { char c2 = s2.charAt(j); if (c1 == c2) { array[i][j] = true; } else { array[i][j] = false; } } } //求所有公因子字串,儲存資訊為相對第二個字串的起始位置和長度 List<ChildString> childStrings = new ArrayList<ChildString>(); for (int i = 0; i < a; i++) { getMaxSort(i, 0, array, childStrings); } for (int i = 1; i < b; i++) { getMaxSort(0, i, array, childStrings); } //排序 sort(childStrings); if (childStrings.size() < 1) { return ""; } //返回最大公因子字串 int max = childStrings.get(0).maxLength; StringBuffer sb = new StringBuffer(); for (ChildString s: childStrings) { if (max != s.maxLength) { break; } sb.append(s2.substring(s.maxStart, s.maxStart + s.maxLength)); sb.append("\n"); } return sb.toString(); } //排序,倒敘 private void sort(List<ChildString> list) { Collections.sort(list, new Comparator<ChildString>(){ public int compare(ChildString o1, ChildString o2) { return o2.maxLength - o1.maxLength; } }); } //求一條斜線上的公因子字串 private void getMaxSort(int i, int j, boolean [][] array, List<ChildString> sortBean) { int length = 0; int start = j; for (; i < a && j < b; i++,j++) { if (array[i][j]) { length++; } else { sortBean.add(new ChildString(length, start)); length = 0; start = j + 1; } if (i == a-1 || j == b-1) { sortBean.add(new ChildString(length, start)); } } } //公因子類 class ChildString { int maxLength; int maxStart; ChildString(int maxLength, int maxStart){ this.maxLength = maxLength; this.maxStart = maxStart; } } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub System.out.println(new StringCompare().getMaxLengthCommonString("achmacmh", "macham")); } }
程式最終執行結果是:
對於兩個檔案的比對個人認為可以參照這種演算法思想(自己現在並為實現),在日後的部落格中將會寫到。
上述實現過程中,用數組儲存了所有的公用子串資訊,然後排序取最大的子串,這種做法如果只是求最大子串的話,演算法就不是很合理,因此做了如下修改,List只儲存當前計算中最大的子串,具體實現如下:
/** *@Description: 字串比較 */ package com.lulei.test; import java.util.ArrayList; import java.util.List; public class StringCompare { private int a; private int b; private int maxLength = -1; public String getMaxLengthCommonString(String s1, String s2) { if (s1 == null || s2 == null) { return null; } a = s1.length();//s1長度做行 b = s2.length();//s2長度做列 if(a== 0 || b == 0) { return ""; } //設定匹配矩陣 boolean [][] array = new boolean[a][b]; for (int i = 0; i < a; i++) { char c1 = s1.charAt(i); for (int j = 0; j < b; j++) { char c2 = s2.charAt(j); if (c1 == c2) { array[i][j] = true; } else { array[i][j] = false; } } } //求所有公因子字串,儲存資訊為相對第二個字串的起始位置和長度 List<ChildString> childStrings = new ArrayList<ChildString>(); for (int i = 0; i < a; i++) { getMaxSort(i, 0, array, childStrings); } for (int i = 1; i < b; i++) { getMaxSort(0, i, array, childStrings); } StringBuffer sb = new StringBuffer(); for (ChildString s: childStrings) { sb.append(s2.substring(s.maxStart, s.maxStart + s.maxLength)); sb.append("\n"); } return sb.toString(); } //求一條斜線上的公因子字串 private void getMaxSort(int i, int j, boolean [][] array, List<ChildString> sortBean) { int length = 0; int start = j; for (; i < a && j < b; i++,j++) { if (array[i][j]) { length++; } else { //直接add,儲存所有子串,下面的判斷,只儲存當前最大的子串 //sortBean.add(new ChildString(length, start)); if (length == maxLength) { sortBean.add(new ChildString(length, start)); } else if (length > maxLength) { sortBean.clear(); maxLength = length; sortBean.add(new ChildString(length, start)); } length = 0; start = j + 1; } if (i == a-1 || j == b-1) { //直接add,儲存所有子串,下面的判斷,只儲存當前最大的子串 //sortBean.add(new ChildString(length, start)); if (length == maxLength) { sortBean.add(new ChildString(length, start)); } else if (length > maxLength) { sortBean.clear(); maxLength = length; sortBean.add(new ChildString(length, start)); } } } } //公因子類 class ChildString { int maxLength; int maxStart; ChildString(int maxLength, int maxStart){ this.maxLength = maxLength; this.maxStart = maxStart; } } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub System.out.println(new StringCompare().getMaxLengthCommonString("abcdef", "defabc")); } }
感謝閱讀,希望能協助到大家,謝謝大家對本站的支援!