There are a lot of typical questions in the programmer's interview book, which can be used for our consideration and will be very helpful to our interview.
The following is a question on page 1: compile a function to remove the comments in the C/C ++ program code, which I can't understand at first, later, I saw a netizen analyze the code in detail on the Internet. However, after reading the code, I thought there were a lot of errors in his analysis. So, I spent more than half a day analyzing the analysis and thought I understood it. The following is my analysis statement. If there are still some less rigorous parts, please advise!
/*************************************** *****************
Function: removes comments from C/C ++.
Input: pointer and length pointing to C/C ++ program code
Source: programmer interview 45th page
Analysis: Read a row at a time in two cases, because there are two Annotations:
(1) Search for "//" in the row obtained. If "//" is found, the "//" and its subsequent parts are discarded.
(2) search for "/*" in the row obtained, record the position pos1, and then find "*/" in this line. If yes, record the position pos2, discard the content and start with pos2 and continue searching for "/*". If it is not found in the current row, remove the "/*" and its contents from the current row, read a new row and find "*/", if not. Remove the row to be read, read a row, and search for "*/". For example, locate and record the position of pos2, and remove the characters between 0 and pos2.
(3) Perform steps 1 and 2 until the program ends.
Special situations for programming I:
"/"/* "In""
', "//", "/*"
The nested relationship between "//" and "/*", such ///*,/*//*/
**************************************** *****************/
# Include <stdio. h>
# Include <stdlib. h>
# Include <fcntl. h>
# Include <string. h>
# Include <unistd. h>
# Include <sys/types. h>
Void remove_comment (char * buf, size_t size)
{
Char * p, * end, c; // p-dynamically moving character pointer, end-pointing to the character pointer at the end of the file, c-storing no p-pointing character
Char * sq_start, * dq_start; // sq_start-single quotation mark start position (single), dq_start-double quotation mark start (double)
Char * lc_start, * bc_start; // start position of lc_start-// and start position of bc_start -/*
Size_t len; // the difference between the end and start position of a symbol (length, offset)
P = buf;
End = p + size;
Sq_start = dq_start = NULL;
Lc_start = bc_start = NULL;
While (p <end)/* When the pointer does not reach the end of the file */
{
C = * p; // use the character variable c to store the characters pointed to by the pointer.
Switch (c) // perform corresponding processing based on the value of c
{
Case '\ '':/* process single quotes */
{
If (dq_start | lc_start | bc_start) // when double quotation marks, // or/* are encountered, you do not need to judge.
{
P ++;
Continue; // continue to the next, for the while
}
/******************************** The following is not subject to double quotation marks or/ /or *******************************/
If (sq_start = NULL)/***** if no single quotation mark is encountered ****/
{
Sq_start = p ++; // start points to the start position of a single quotation mark, and p points to the next
}
Else/* If you have encountered single quotes, sq_start points to the start position of single quotes */
{
Len = (p ++)-sq_start;
If (len = 2 & * (sq_start + 1) = '\\')
{
/* In the case of "'\'' ", the two single quotes do not match, and the"' "is in" \ ", instead of matching the single quotes pointed by sq_start */
Continue;
}
Sq_start = NULL; // otherwise, set sq_start to NULL.
}
/******************************* Or above are not subject to double quotation marks or/ /or *******************************/
Break;
}
Case '\ "':/* process double quotation marks */
{
If (sq_start | lc_start | bc_start) // when a single quotation mark, // or/* is encountered, no processing is required.
{
P ++;
Continue;
}
******** *********/
If (dq_start = NULL)/* if no double quotation marks are encountered */
{
Dq_start = p ++; // double quotation marks
}
Else if (* (p ++)-1) = '\\')
{
/* If "AB \ 'CD" is encountered, the two double quotation marks do not match. The "encountered is in, instead of matching the double quotation marks pointed to by dq_start */
Continue;
}
Dq_start = NULL; // if it is not in double quotation marks, mark it as NULL.
/***************** The above is not subject to single quotes or // or ******** *********/
}
Case '/': // The slash. Note that this Slash can also be the second slash in, but it will be ignored in the second line of code below
{
If (sq_start | dq_start | lc_start | bc_start) // if it is behind single quotation marks, double quotation marks, slashes, And /*
{
P ++;
Continue;
}
***** ***********************/
C = * (p + 1); // otherwise, c takes the next character pointed to by p.
If (c = '/') // encounters a double slash
{
Lc_start = p; // mark the start of a double slash.
P + = 2; // p indicates the character after the double slash
}
Else if (c = '*') // encountered /*
{
Bc_start = p; // mark the start /*
P + = 2; // p indicates the character after /*
}
/************************* The above is when a comment // or/* is encountered *** ***********************/
Else
{
P ++;
}
}
Case '*': // asterisks with the same slash, but if/* is missing, it is necessary to determine if it is the end */
{
If (sq_start | dq_start | lc_start) // if it is behind a single quotation mark, double quotation mark, slash, or /*
{
P ++;
Continue;
}
If (* (p + 1 )! = '/') // If it is not followed by a slash, it is ignored.
{
P ++;
Continue;
}
P + = 2; // otherwise, p points to the character behind the slash. Note that the characters pointed to by p are not cleared in the following clear statement.
Memset (bc_start, '', p-bc_start); // clear /*...... */The intermediate content includes the annotator itself.
Bc_start = NULL;
Break;
}
Case '\ N':/* line break. When encountering a double slash, remove the double slash to the character before \ n */
{
If (lc_start = NULL) // if you have not encountered a double slash, ignore
{
P ++;
Continue;
}
C = * (p-1 );
/* If you have encountered a double slash, clear the double slash itself and the character before \ n, p points to the next character, And/r is the carriage return character (the cursor returns to the beginning ), judge here
C = '\ R' is because only \ n is entered at the end of a file in UNIX, while \ r \ n */
Memset (lc_start, '', (c = '\ R '? (P ++)-1): p ++)-lc_start );
Lc_start = NULL;
Break;
}
Default:
P ++;
Break;
}
/*************************************** *************
If a double slash is encountered, the if statement has the following meaning:
If a line of code contains a double slash but does not provide the line break \ n, clear it.
**************************************** *************/
If (lc_start)
{
Memset (lc_start, '', p-lc_start );
}
}
}
/*************************************** *******
Start of main Function
**************************************** *******/
Int main (int argc, char * argv [])
{
Int fd, n;
Char buf [102400];
If (argc! = 2)
{
Printf ("command error: Input as./command <file> \ n ");
}
Fd = open (argv [1], O_RDONLY);/* read-only open */
If (fd =-1)
{
Return-1;
}
N = read (fd, buf, sizeof (buf ));
If (n =-1 | n = 0)
{
Close (fd );
Return-1;
}
Printf ("test \ n ");
Remove_comment (buf, n );
* (Buf + n) = '\ 0 ';
Printf ("% s", buf );
Close (fd );
Return 0;
}