While providing convenience and practicality, the map in Java also has a huge memory waste. When the number of entries in the map reaches 10 million or more, it requires several GB of memory space. the map format mentioned here is hashmap <string, byte>. On average, each key is about 20 characters and cannot exceed 200 characters. In actual situations, about 5/6 of memory is wasted in areas unrelated to actual data storage. there is no need to waste so many resources when writing data multiple times at a time. The following is a simple implementation description. Algorithm Description: Put all the entries into different queues according to the length of the key. To facilitate the sorting of the entry queues, you can also add the entries to the memory before sorting. There are a lot of queues now. During the query, select a queue based on the length of the query term, and search in the queue by two methods. Algorithm applicability: It is applicable to scenarios where key values are concentrated in a certain range and values are of simple type (such as byte, Int, long, float, and double), with a data volume of more than one million records and insufficient memory. View plaincopy to clipboardprint? ··· · 50 ······· · 90 ····· · 140 · 150 Import java. Io. bufferedinputstream; Import java. Io. bufferedreader; Import java. Io. datainputstream; Import java. Io. file; Import java. Io. fileinputstream; Import java. Io. filereader; Import java. Io. ioexception; Import java. util. hashmap; Import java. util. Map; Public class bsortmap {
Private Static Map <integer, bsortmap> bulk = new hashmap <integer, bsortmap> ();
Final private byte [] entrys; // entry Array Private int COUNT = 0; Final private int keylength; // the size of the key in bytes Final private int entrylength; // number of bytes for each entry
Public bsortmap (INT capacity, int keylen ){ This. keylength = keylen; Entrylength = keylen + 1; Entrys = new byte [capacity * entrylength]; }
Public int size (){ Return count; }
/** * Add record entries. The entries are sorted. * The SRC format is <key, value> * @ Param SRC */ Final public void add (byte SRC []) { System. arraycopy (SRC, 0, entrys, count * entrylength, entrylength ); Count ++; }
Final private int compare (final int begin, final byte [] B ){ Int I = 0; For (; entrys [begin + I] = B [I] & I <B. Length-1; I ++) ; Return entrys [begin + I]-B [I]; }
/** * Obtain the value associated with the key * @ Param key * @ Return if the value associated with the key does not exist,-1 is returned. */ Final public byte get (final byte [] Key ){ Int I = 0; Int J = count-1; Int mid; While (I <= J ){ Mid = (I + J)> 1; Final int ret = compare (mid * entrylength, key ); If (ret = 0 ){ Return entrys [Mid * entrylength + keylength]; // return the result } Else if (Ret <0 ){ I = Mid + 1; } Else { J = mid-1; }
} Return-1;
}
Public static void main (string ARGs []) throws ioexception { File dir = new file ("D:/workspace/partion_keyword/sort "); File [] files = dir. listfiles ();
For (file F: Files ){
Datainputstream in = new datainputstream (New bufferedinputstream (New fileinputstream (F ))); /* * F indicates the objects sorted by key value. * The file format is: * Entryscount: an integer with a value of the total record entries. * Keylength: an integer with a value of the byte length of the keyword. * <Key, value> List */ Final int entryscount = in. readint (); Final int keylength = in. readint (); Byte [] buffer = new byte [keylength + 1]; Bsortmap BST = new bsortmap (entryscount, keylength ); Int I = 0; While (in. Available ()> 0 ){ Int L = in. Read (buffer ); While (L! = Keylength + 1 ){ System. Err. println ("not equal." + l ); } BST. Add (buffer ); I ++; } Bulk. Put (keylength, BST ); If (entryscount! = I) System. Err. println (F. getname () + ":" + entryscount + "," + I ); In. Close (); }
Bufferedreader READ = new bufferedreader (New filereader ("D:/Eclipse/workspace/CONF/wiki_kws.data ")); String line; Int COUNT = 0; Long start = system. currenttimemillis (); While (line = read. Readline ())! = NULL ){ Byte key [] = line. Trim (). getbytes (); Bsortmap bt = Bulk. Get (key. Length ); If (BT! = NULL & BT. Get (key )! =-1 ){ Count ++; } } Long end = system. currenttimemillis (); System. Out. println (end-Start ); System. Out. println ("count:" + count );
Int totalcount = 0; For (bsortmap S: bulk. Values ()){ Totalcount + = S. Size (); } System. Out. println ("Total count:" + totalcount ); Read. Close ();
/* While (true ){ Running in = New Processing (system. In ); String key = in. Next (). Trim (); If (key. inclusignorecase ("exit ")) Break; Int Len = key. Trim (). getbytes (). length; Bsort bt = Bulk. Get (LEN ); Byte v = Bt. Get (key. getbytes ()); System. Err. println (Key + "=" + V ); }*/
} } Import java. Io. bufferedinputstream; Import java. Io. bufferedreader; Import java. Io. datainputstream; Import java. Io. file; Import java. Io. fileinputstream; Import java. Io. filereader; Import java. Io. ioexception; Import java. util. hashmap; Import java. util. Map; Public class bsortmap { Private Static Map <integer, bsortmap> bulk = new hashmap <integer, bsortmap> (); Final private byte [] entrys; // entry Array Private int COUNT = 0; Final private int keylength; // the size of the key in bytes Final private int entrylength; // number of bytes for each entry Public bsortmap (INT capacity, int keylen ){ This. keylength = keylen; Entrylength = keylen + 1; Entrys = new byte [capacity * entrylength]; } Public int size (){ Return count; } /** * Add record entries. The entries are sorted. * The SRC format is <key, value> * @ Param SRC */ Final public void add (byte SRC []) { System. arraycopy (SRC, 0, entrys, count * entrylength, entrylength ); Count ++; } Final private int compare (final int begin, final byte [] B ){ Int I = 0; For (; entrys [begin + I] = B [I] & I <B. Length-1; I ++) ; Return entrys [begin + I]-B [I]; } /** * Obtain the value associated with the key * @ Param key * @ Return if the value associated with the key does not exist,-1 is returned. */ Final public byte get (final byte [] Key ){ Int I = 0; Int J = count-1; Int mid; While (I <= J ){ Mid = (I + J)> 1; Final int ret = compare (mid * entrylength, key ); If (ret = 0 ){ Return entrys [Mid * entrylength + keylength]; // return the result } Else if (Ret <0 ){ I = Mid + 1; } Else { J = mid-1; }
} Return-1;
}
Public static void main (string ARGs []) throws ioexception { File dir = new file ("D:/workspace/partion_keyword/sort "); File [] files = dir. listfiles ();
For (file F: Files ){
Datainputstream in = new datainputstream (New bufferedinputstream (New fileinputstream (F ))); /* * F indicates the objects sorted by key value. * The file format is: * Entryscount: an integer with a value of the total record entries. * Keylength: an integer with a value of the byte length of the keyword. * <Key, value> List */ Final int entryscount = in. readint (); Final int keylength = in. readint (); Byte [] buffer = new byte [keylength + 1]; Bsortmap BST = new bsortmap (entryscount, keylength ); Int I = 0; While (in. Available ()> 0 ){ Int L = in. Read (buffer ); While (L! = Keylength + 1 ){ System. Err. println ("not equal." + l ); } BST. Add (buffer ); I ++; } Bulk. Put (keylength, BST ); If (entryscount! = I) System. Err. println (F. getname () + ":" + entryscount + "," + I ); In. Close (); }
Bufferedreader READ = new bufferedreader (New filereader ("D:/Eclipse/workspace/CONF/wiki_kws.data ")); String line; Int COUNT = 0; Long start = system. currenttimemillis (); While (line = read. Readline ())! = NULL ){ Byte key [] = line. Trim (). getbytes (); Bsortmap bt = Bulk. Get (key. Length ); If (BT! = NULL & BT. Get (key )! =-1 ){ Count ++; } } Long end = system. currenttimemillis (); System. Out. println (end-Start ); System. Out. println ("count:" + count );
Int totalcount = 0; For (bsortmap S: bulk. Values ()){ Totalcount + = S. Size (); } System. Out. println ("Total count:" + totalcount ); Read. Close ();
/* While (true ){ Running in = New Processing (system. In ); String key = in. Next (). Trim (); If (key. inclusignorecase ("exit ")) Break; Int Len = key. Trim (). getbytes (). length; Bsort bt = Bulk. Get (LEN ); Byte v = Bt. Get (key. getbytes ()); System. Err. println (Key + "=" + V ); }*/
} } Test results: |