A big Data external sort (memory cannot load all sort elements), remove duplicate elements, quickly find random deleted elements of the bitmap small algorithm, the core idea is to index a number as subscript (index) a bit to indicate whether a number exists , The time complexity of sorting is O (N), the complexity of additional space required O (N/8), an example of an algorithm that supports the entire int range (positive and negative) is as follows:
Char bitmask[] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};int writenumberbittobyte (char *bytearra, unsigned in t bytearrasize, int number) {//printf ("%d,%d,%d\n", (Bytearrasize * 4)-1,-(bytearrasize*4), number), if (((int) ( Bytearrasize * 4)-1) < number | | number<-(int) (bytearrasize*4)) {return 0;//failed,number out of Bytearra.} int basearrabitpos = bytearrasize *4;//bytearrasize *8/2basearrabitpos+=number;printf ("BaseArraBitPos=%d,Number=%d\ n ", Basearrabitpos,number); BYTEARRA[BASEARRABITPOS/8] |= mask[basearrabitpos%8];return 1;//success}int isnumberbitinbyte (char *ByteArra, unsigned int bytearrasize, int number) {if (((int) (BYTEARRASIZE * 4)-1) < number | | number<-(int) (bytearrasize*4)) {return 0;//failed,number out of Bytearra.} int basearrabitpos = bytearrasize *4;//bytearrasize *8/2basearrabitpos+=number;if (BYTEARRA[BASEARRABITPOS/8] & Bitmask[basearrabitpos%8]) {return 1;} return 0;//number not found.} void Printorderedbitmap (char *bitmap,unsigned int Bitmapcount) {int minmumnumber =-(BITMAPCOUNT*8/2); int maximumvalue = (BITMAPCOUNT*8/2) -1;for (int i = minmumnumber; I &l T;= MaximumValue; ++i) {if (Isnumberbitinbyte (bitmap,bitmapcount,i)) {printf ("%d,", I);}} printf ("\ n");} int main () {int arra[] = {3,-4,2,0,-1,-8,7,-12,10};int MaximumValue =arra[0],minmumvalue=arra[0];for (int i = 0; i < siz EOF (Arra)/sizeof (arra[0]); ++i) {if (Maximumvalue<arra[i]) {maximumvalue = Arra[i];} if (Minmumvalue>arra[i]) {minmumvalue = Arra[i];}} maximumvalue=maximumvalue<0?-maximumvalue:maximumvalue; minmumvalue=minmumvalue<0?-minmumvalue:minmumvalue; Maximumvalue=maximumvalue>minmumvalue? maximumvalue:minmumvalue;printf ("maximumvalue=%d\n", maximumvalue);//unsigned int bitmapcount = (MaximumValue*2+7)/ 8;unsigned int bitmapcount = (maximumvalue+3)/4; Bitmapcount = bitmapcount>0? Bitmapcount:1;char *bitmap = (char*) malloc (Bitmapcount); for (int i = 0; i < sizeof (Arra)/sizeof (arra[0]); ++i) {Writenu Mberbittobyte (Bitmap,bitmapcount,arra[i]);} PrintordEredbitmap (Bitmap,bitmapcount);}
An example of an algorithm that supports only the unsigned int range is as follows:
Char bitmask[] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};int writenumberbittobyte (char *bytearra, unsigned in T bytearrasize, unsigned int number) {if ((((Bytearrasize * 8)-1) < number) {return 0;//failed,number out of Bytearra. }int Bytepos = number/8;int bitpos = number% 8; Bytearra[bytepos] |= bitmask[bitpos];return 1;//success}int isnumberbitinbyte (char *bytearra, unsigned int bytearrasize, unsigned int number) {if ((Bytearrasize * 8-1) < number) {return 0;//failed,number out of Bytearra.} int Bytepos = number/8;int Bitpos = number% 8;if (Bytearra[bytepos] & Bitmask[bitpos]) {return 1;} return 0;//number not found.}
The above algorithm uses a bit to represent a number, that is, there are only 2 possible, either, or none, can be extended to a byte to represent a number, so that you can count the occurrence of 255 times in the range of repeating elements, the principle and so on.
In addition, a bit to represent an int number, saving 31 times times the memory space, that is, int (4*8), Bit (8/1), so the more the amount of data to use this way the more obvious advantage, if the scenario is applicable to this way.
Copyright NOTICE: This article for Bo Master original article, without Bo Master permission not reproduced.
A bitmap small algorithm for large data processing