This is a creation in Article, where the information may have evolved or changed.
StarDict dictionary idx file format:
Each entry contains three fields in a list of words:
WORD_STR; A utf-8 string terminated by '.
An utf-8 encoded string that ends with a '/' Terminator. The length of the word_str will be less than 256
Word_data_offset; Word data ' s offset in. dict file
The offset of the word data in the. dict file,
If The version is "3.0.0" and "idxoffsetbits=64",
Word_data_offset'll is 64-bits unsigned number in network byte order.
Word_data_size; Word data ' s total size in. dict file
The total size of the word data in the. dict file,word_data_size should be 32-bits unsigned number
In network byte order.
Package Mainimport (//"Bufio" "IO" "OS" "FMT" "StrConv") func main () {fi, err: = OS. Open ("Gaojihanyudacidian_fix.idx")/* Modern Chinese dictionary \\*/if err! = Nil {panic (ERR)} defer fi. Close () FO, err: = OS. Create ("output.txt") if err! = Nil {panic (err)} defer fo. Close ()/* Read 4 bytes at a time */read_buf: = Make ([]byte,4) Jiange: = make ([]byte,1) Huanghuang: = make ([]byte,1) jiange[0] =9 huanghuang[0]=10 var pos,nextpos uint64 = 0,0/* Tag the current field to read which data is 1 is word_str to 2 is Word_data_offset For 3 is word_data_size*/var setp int = 1/* Record the number of characters that have been stored in WORD_STR (not including the last of the string), that is, next from Word_str[lenth_of_word_str] Start deposit */var lenth_of_word_str,charnum int = 0,0 WORD_STR: = make ([]byte,257) var tmpchar byte =0 var word_data_o Ffset UInt64 = 0 var word_data_size uint64 = 0 Count: =1 for{pos=nextposcount++ n,err: = fi. ReadAt (Read_buf, (Int64) (POS)) if err! = Nil && Err! = Io. eof{Panic (ERR)} /* File Format requirements: The description file ends when reading data less than 4 bytes */if n < 4{FMT. Printf ("\nfinish read\n") break}switch Setp {case 1://fmt. Println ("1:") tmpchar=read_buf[0]/* if the first character is '% ', the step has ended */if tmpchar! = 0{/* because there may be commas, so there may be only 1 or two bytes to form a character */if Tmpchar < 128{charnum=1/*charnum record UTF8 encoded characters */}else if Tmpchar < 194{panic (err)}else if Tmpchar < 224{charnum=2 }else If Tmpchar < 240{charnum=3}else{panic (err)}read_buf[charnum]=0str1: = (String) (Read_buf[0:charnum+1]) copy ( Word_str[lenth_of_word_str:lenth_of_word_str+charnum], Read_buf[:charnum]) lenth_of_word_str=lenth_of_word_str+ Charnumnextpos=nextpos+ (UInt64) (charNum) continue}else{word_str[lenth_of_word_str]=9nextpos=nextpos+1} Case 2: Word_data_offset =0word_data_offset = word_data_offset+ ((UInt64) (read_buf[0]) *16*16*16*16*16*16word_data_offset = word_data_offset+ ((UInt64) (read_buf[1)) *16*16*16*16word_data_offset = word_data_offset+ ((UInt64) (read_buf[2])) * 16*16word_data_offset = word_data_offset+ (UInt64) (Read_buf[3]) nextpos=nextpos+4//FMT. Printf ("Word_data_offset =%d \ n", word_data_offset/* (UInt64) (read_buf[3]) */) case 3:word_data_size =0word_data_size = Word_data_size+ (UInt64) (read_buf[0]) *16*16*16*16*16*16word_data_size = word_data_size+ (UInt64) (read_buf[1]) *16* 16*16*16word_data_size = word_data_size+ (UInt64) (read_buf[2]) *16*16word_data_size = word_data_size+ (UInt64) (Read_ BUF[3]) Nextpos=nextpos+4default:}/* can now be written to */if Setp = = 3{if _,err: = fo. Write (word_str[:lenth_of_word_str+1]); Err! = nil{Panic (err)} word_data_offset_str: = StrConv. Formatuint (word_data_offset,10) Word_data_size_str: = StrConv. Formatuint (word_data_size,10) if _,err: = fo. WriteString (WORD_DATA_OFFSET_STR); Err! = nil{Panic (err)} if _,err: = fo. Write (Jiange[0:1]); Err! = nil{Panic (err)}if _,err: = fo. WriteString (WORD_DATA_SIZE_STR); Err! = nil{Panic (err)}if _,err: = fo. Write (Huanghuang[0:1]); Err! = nil{Panic (err)}lenth_of_word_str=0}setp=setp+1 if SETP > 4{SETP = 1}}