程式人生 2010-02-25 15:55:04 閱讀335 評論0 字型大小:大中小 訂閱
接著前面的分析,先看m_decoder->decode(str, len);
String TextResourceDecoder::decode(const char* data, size_t len)
{
if (!m_checkedForBOM)
checkForBOM(data, len); // 檢查是否為Unicode編碼
bool movedDataToBuffer = false;
if (m_contentType == CSS && !m_checkedForCSSCharset)
if (!checkForCSSCharset(data, len, movedDataToBuffer)) // 如果是CSS,則檢查CSS的字元集
return "";
if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForHeadCharset) // HTML and XML
if (!checkForHeadCharset(data, len, movedDataToBuffer)) // 檢查HTML/XML的字元集
return "";
// Do the auto-detect if our default encoding is one of the Japanese ones.
// FIXME: It seems wrong to change our encoding downstream after we have already done some decoding.
if (m_source != UserChosenEncoding && m_source != AutoDetectedEncoding && encoding().isJapanese())
detectJapaneseEncoding(data, len); // 檢查日文編碼(為什麼沒有檢查中文編碼的啊?)
ASSERT(encoding().isValid());
if (m_buffer.isEmpty())
return m_decoder.decode(data, len, false, m_contentType == XML, m_sawError);
if (!movedDataToBuffer) {
size_t oldSize = m_buffer.size();
m_buffer.grow(oldSize + len);
memcpy(m_buffer.data() + oldSize, data, len);
}
String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), false, m_contentType == XML, m_sawError);
m_buffer.clear();
return result;
}
再回到tokenizer->write(decoded, true);看其具體實現:
bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
{
if (!m_buffer)
return false;
if (m_parserStopped)
return false;
SegmentedString source(str);
if (m_executingScript)
source.setExcludeLineNumbers();
if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
// don't parse; we will do this later
if (m_currentPrependingSrc)
m_currentPrependingSrc->append(source);
else {
m_pendingSrc.append(source);
#if PRELOAD_SCANNER_ENABLED
if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
m_preloadScanner->write(source);
#endif
}
return false;
}
#if PRELOAD_SCANNER_ENABLED
if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
m_preloadScanner->end();
#endif
if (!m_src.isEmpty())
m_src.append(source);
else
setSrc(source);
// Once a timer is set, it has control of when the tokenizer continues.
if (m_timer.isActive())
return false;
bool wasInWrite = m_inWrite;
m_inWrite = true;
#ifdef INSTRUMENT_LAYOUT_SCHEDULING
if (!m_doc->ownerElement())
printf("Beginning write at time %d ", m_doc->elapsedTime());
#endif
int processedCount = 0;
double startTime = currentTime();
Frame* frame = m_doc->frame();
State state = m_state;
while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
if (!continueProcessing(processedCount, startTime, state))
break;
// do we need to enlarge the buffer?
checkBuffer();
UChar cc = *m_src;
bool wasSkipLF = state.skipLF();
if (wasSkipLF)
state.setSkipLF(false);
if (wasSkipLF && (cc == ' '))
m_src.advance();
else if (state.needsSpecialWriteHandling()) {
// it's important to keep needsSpecialWriteHandling with the flags this block tests
if (state.hasEntityState())
state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
else if (state.inPlainText())
state = parseText(m_src, state);
else if (state.inAnySpecial())
state = parseSpecial(m_src, state);
else if (state.inComment())
state = parseComment(m_src, state);
else if (state.inDoctype())
state = parseDoctype(m_src, state);
else if (state.inServer())
state = parseServer(m_src, state);
else if (state.inProcessingInstruction())
state = parseProcessingInstruction(m_src, state);
else if (state.hasTagState())
state = parseTag(m_src, state);
else if (state.startTag()) {
state.setStartTag(false);
switch(cc) {
case '/':
break;
case '!': {
// or
searchCount = 1; // Look for ' m_doctypeSearchCount = 1;
break;
}
case '?': {
// xml processing instruction
state.setInProcessingInstruction(true);
tquote = NoQuote;
state = parseProcessingInstruction(m_src, state);
continue;
break;
}
case '%':
if (!m_brokenServer) {
// <% server stuff, handle as comment %>
state.setInServer(true);
tquote = NoQuote;
state = parseServer(m_src, state);
continue;
}
// else fall through
default: {
if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
// Start of a Start-Tag
} else {
// Invalid tag
// Add as is
*m_dest = '<';
m_dest++;
continue;
}
}
}; // end case
processToken();
m_cBufferPos = 0;
state.setTagState(TagName);
state = parseTag(m_src, state);
}
} else if (cc == '&' && !m_src.escaped()) {
m_src.advancePastNonNewline();
state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
} else if (cc == '<' && !m_src.escaped()) {
m_currentTagStartLineNumber = m_lineNumber;
m_src.advancePastNonNewline();
state.setStartTag(true);
state.setDiscardLF(false);
} else if (cc == ' ' || cc == ' ') {
if (state.discardLF())
// Ignore this LF
state.setDiscardLF(false); // We have discarded 1 LF
else {
// Process this LF
*m_dest++ = ' ';
if (cc == ' ' && !m_src.excludeLineNumbers())
m_lineNumber++;
}
/* Check for MS-DOS CRLF sequence */
if (cc == ' ')
state.setSkipLF(true);
m_src.advance(m_lineNumber);
} else {
state.setDiscardLF(false);
*m_dest++ = cc;
m_src.advancePastNonNewline();
}
}
#ifdef INSTRUMENT_LAYOUT_SCHEDULING
if (!m_doc->ownerElement())
printf("Ending write at time %d ", m_doc->elapsedTime());
#endif
m_inWrite = wasInWrite;
m_state = state;
if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
end(); // this actually causes us to be deleted
return true;
}
return false;
}
在調用的時候,因為調用參數decoded是String類型的,所以先隱含轉化成SegmentedString。SegmentedString可以附帶行號,也可以不帶行號(可以設定)。上面程式中的while迴圈主體,就是一個剖析器主體。