標籤:mystra 編程演算法 字典分詞 深度優先搜尋 代碼
字典分詞 代碼(C)
本文地址: http://blog.csdn.net/caroline_wendy
給定字典, 給定一句話, 進行分詞.
使用深度遍曆(DFS)的方法.
使用一個參數string, 儲存當前分支的分詞後的句子; 使用一個參數vector, 儲存所有可能的組合.
使用一個驗證函式, 判斷句子是否可以分詞.
代碼:
/* * main.cpp * * Created on: 2014.9.18 * Author: Spike * Copyright (c) 2014年 WCL. All rights reserved. *//*eclipse cdt, gcc 4.8.1*/#include <iostream>#include <vector>#include <string>#include <set>using namespace std;bool Match(string s, string m) {int l = m.length();if (s.substr(0, l) == m) {return true;}return false;}bool Validate(string s, vector<string> &dict) {//1. calculate all alphabets in the queryset<char> sc;for (size_t i = 0; i < s.length(); i++) {sc.insert(s[i]);}//2. calculate all alphabets in the dictionaryset<char> dc;for (vector<string>::iterator it = dict.begin();it != dict.end(); it++){for (size_t i = 0; i < (*it).length(); i++) {dc.insert((*it)[i]);}}for (set<char>::iterator it = sc.begin(); it != sc.end(); it++) {if (dc.find(*it) == dc.end()) {return false;}}return true;}string Split(string s, vector<string> &dict, string cur, vector<string>& list) {if (s.length() == 0) {list.push_back(cur);return s;}for (vector<string>::iterator it = dict.begin(); it != dict.end(); it++) {if (Match(s, *it)) {string tmp = cur;string latter = s.substr(it->length(), s.length() - it->length());cur += (*it) + "~"; // add current word to cur_strcur += Split(latter, dict, cur, list); // split remaining wordscur = tmp; //back to last status}}return "No Result";}vector<string> SplitWords(string s, vector<string> &dict) {string cur = "";vector<string> list;if (!Validate(s, dict)) {return list;}Split(s, dict, cur, list);return list;}int main(){ vector<string> dict={"程式員","公務員","員","我","喜","做","程式","一","歡","喜歡","做一個","一個"}; vector<string> words = SplitWords("我喜歡做一個程式員", dict); for (vector<string>::iterator it=words.begin(); it!=words.end(); it++) { cout<<(*it)<<endl; } return 0;}
輸出:
我~喜~歡~做~一個~程式員~我~喜~歡~做~一個~程式~員~我~喜~歡~做一個~程式員~我~喜~歡~做一個~程式~員~我~喜歡~做~一個~程式員~我~喜歡~做~一個~程式~員~我~喜歡~做一個~程式員~我~喜歡~做一個~程式~員~
編程演算法 - 字典分詞 代碼(C)