這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。
StarDict 詞典idx檔案格式:
每一個條目在單字清單中包含陸續三個域:
word_str; // a utf-8 string terminated by '\0'.
// 一個 utf-8 編碼字串,以 '\0' 終止符結束。word_str 的長度將是小於 256 的
word_data_offset; // word data's offset in .dict file
// 單詞資料在 .dict 檔案中的位移,
//If the version is "3.0.0" and "idxoffsetbits=64",
//word_data_offset will be 64-bits unsigned number in network byte order.
word_data_size; // word data's total size in .dict file
// 單詞資料在 .dict 檔案中的總大小,word_data_size should be 32-bits unsigned number
// in network byte order.
package mainimport (// "bufio" "io" "os" "fmt" "strconv")func main() { fi, err := os.Open("gaojihanyudacidian_fix.idx")/*現代漢語詞典\\*/ if err != nil { panic(err) } defer fi.Close() fo, err := os.Create("output.txt") if err != nil { panic(err) } defer fo.Close()/*每次唯讀4個位元組*/ read_buf := make([]byte,4) jiange := make([]byte,1) huanghuang := make([]byte,1) jiange[0]=9 huanghuang[0]=10 var pos,nextPos uint64 = 0,0 /*標記當下需要讀取的是哪個域的資料 為1是word_str 為2是word_data_offset 為3是word_data_size*/ var setp int = 1 /*記錄已經儲存在word_str中的字元個數(不包括字串最後的\0),也就是說下次從word_str[lenth_of_word_str]處開始存*/ var lenth_of_word_str,charNum int = 0,0 word_str := make([]byte,257) var tmpChar byte =0 var word_data_offset uint64 = 0 var word_data_size uint64 = 0 count :=1 for{ pos=nextPoscount++ n,err := fi.ReadAt(read_buf,(int64)(pos)) if err != nil && err != io.EOF{ panic(err) } /*檔案格式要求:讀取資料小於4個位元組時說明檔案結束*/ if n < 4{ fmt.Printf("\nfinish read\n") break }switch setp {case 1://fmt.Println("1:")tmpChar=read_buf[0]/*如果第一個字元是‘\0’說明該步驟已經結束*/if tmpChar != 0{/*因為可能有逗號,所以可能只有1個或者倆個位元組組成一個字元*/if tmpChar < 128{charNum=1/*charNum記錄utf8編碼的字元數*/}else if tmpChar < 194{panic(err)}else if tmpChar < 224{charNum=2}else if tmpChar < 240{charNum=3}else{panic(err)}read_buf[charNum]=0str1 := (string)(read_buf[0:charNum+1])copy(word_str[lenth_of_word_str:lenth_of_word_str+charNum], read_buf[:charNum])lenth_of_word_str=lenth_of_word_str+charNumnextPos=nextPos+(uint64)(charNum)continue}else{word_str[lenth_of_word_str]=9nextPos=nextPos+1} case 2:word_data_offset =0word_data_offset = word_data_offset+((uint64)(read_buf[0]))*16*16*16*16*16*16word_data_offset = word_data_offset+((uint64)(read_buf[1]))*16*16*16*16word_data_offset = word_data_offset+((uint64)(read_buf[2]))*16*16word_data_offset = word_data_offset+(uint64)(read_buf[3])nextPos=nextPos+4// fmt.Printf("word_data_offset =%d \n",word_data_offset/*(uint64)(read_buf[3])*/)case 3:word_data_size =0word_data_size = word_data_size+(uint64)(read_buf[0])*16*16*16*16*16*16word_data_size = word_data_size+(uint64)(read_buf[1])*16*16*16*16word_data_size = word_data_size+(uint64)(read_buf[2])*16*16word_data_size = word_data_size+(uint64)(read_buf[3])nextPos=nextPos+4default: } /*現在可以寫入了*/if setp == 3{if _,err := fo.Write( word_str[:lenth_of_word_str+1]); err != nil{ panic(err)} word_data_offset_str := strconv.FormatUint(word_data_offset,10) word_data_size_str := strconv.FormatUint(word_data_size,10) if _,err := fo.WriteString( word_data_offset_str); err != nil{ panic(err)} if _,err := fo.Write( jiange[0:1]); err != nil{ panic(err)}if _,err := fo.WriteString( word_data_size_str); err != nil{ panic(err)}if _,err := fo.Write( huanghuang[0:1]); err != nil{ panic(err)}lenth_of_word_str=0} setp=setp+1 if setp > 4{setp = 1 } }}