Logstash-forward source core ideas include the following roles (modules):
Prospector: Find the file in the Paths/globs file below, and start harvesters, submit the file to harvesters
Harvester: Read the scan file and submit the appropriate event to spooler
Spooler: As a buffer buffer pool, reach the size or counter time to the event information inside the flush pool to Publisher
Publisher: Connect the network (Connect is authenticated by SSL), transfer the event data to the specified location, and notify the Registrar that the transfer was successful
Registrar: Record file records, including log now read offset and other information, stored under the. logstash-forward file
Main Source Code appreciation:
Prospector------------
Main functions Traverse file Properties func (p *prospector) scan (path string, output chan * Fileevent, resume *prospectorresume) If fileinfo. Modtime (). Before (P.lastscan) && time. Since (FileInfo. Modtime ()) > p.fileconfig.deadtime {var offset int64 = 0var is_ resuming bool = falseif resume != nil {// call the calculator - it will process resume state if there is Oneoffset, is_resuming = p.calculate_resume (File, fileinfo, resume)}// Are We resuming a dead file? we have to resume even if dead so we catch any old updates to the file// this is safe as the harvester, once it hits the eof and a timeout, will stop harvesting// once we detect changes again we can resume another harvester again - this keeps number of go Routines to a minimumif is_resuming {emit ("Resuming harvester on a previously harvested file: %s\n ", file) Harvester := &harvester{path: file, FileConfig: p.FileConfig, Offset: offset, FinishChan: Newinfo.harvester}go harvester. Harvest (output)} else {// old file, skip it, but push offset of file size so we start from the end if this File changes and needs picking upemit ("skipping file (older than DEAD&NBSP;TIME&NBSP;OF&NBSP;%V): %s\n ", p.fileconfig.deadtime, file) newinfo.harvester < - FileInfo. Size ()}} is primarily executed to go harvester. Harvest (Output)
Harvester------------
Main function func (h *harvester) harvest (output chan *fileevent), sweep surface file for {text, Bytesread, err := h.readline (reader, buffer, read_timeout) if err != Nil {if err == io. Eof {// timed out waiting for data, got eof.// check to see if the file was truncatedinfo, _ := h.file.stat () if Info. Size () < h.offset {emit ("file truncated, seeking to beginning: %s\n ", h.path) H.file.seek (0, os. Seek_set) H.offset = 0} else if age := time. Since (last_read_time); age > h.fileconfig.deadtime {// if last_read_time was more than dead time, this file is probably// dead. stop watching it.emit ("Stopping harvest of %s; last chAnge was %v ago\n ", h.path, age) return}continue} else {emit (" Unexpected state reading from %s; error: %s\n ", h.path, err) Return}}last_read_ Time = time. Now () line++event := &fileevent{source: &h.path,offset: h. offset,line: line,text: text,fields: &h.fileconfig.fields,fileinfo: &info,}h.offset += int64 (bytesread) output <- event // ship the new event downstream} /* forever */ Main implementation to output <- event
Spooler ------------
Func spool (Input chan *fileevent, output chan []*fileevent, max _size uint64, idle_timeout time. Duration) { // heartbeat periodically. If the last flush was longer than // ' idle_timeout ' time ago, then we ' ll Force a flush to prevent us from // holding on to spooled events for too long. ticker := time. Newticker (idle_timeout / 2) // slice for spooling into // todo (Sissel): use container. Ring? spool := make ([]*fileevent, max_size) // current write position in the spool var spool_i int = 0 next_ Flush_time := time. Now (). ADD (idle_timeout) for { select { case event := <-input: //append (spool, event) spool[spool_i] = event spool_i++ // flush if full if spool_i == cap ( Spool) { //spoolcopy := make ([]*FileEvent, max_size) var spoolcopy []*FileEvent //fmt. Println (Spool[0]) spoolcopy = append (spoolcopy, spool[:] ...) output <- spoolcopy next_flush_time = time. Now (). ADD (idle_timeout) spool_i = 0 } case <-ticker. C: //fmt. PRINTLN ("tick") if now := time. Now (); now. After (Next_flush_time) { // if current time is after the next_flush_time, flush! Fmt. Printf ("timeout: %d exceeded by %d\n", idle_timeout, //now. Sub (next_flush_time)) // flush what we have , if anything if spool_i > 0 { var spoolcopy []*FileEvent spoolcopy = append (Spoolcopy, spool[0:spool_i] ...) output <- spoolcopy next_flush_time = now. ADD (idle_timeout) spool_i = 0 } } /* if ' Now ' is after ' Next_flush_time ' */ /* case ... */ } /* select */ } /* for */} /* spool */pipeline blocking waits for input data, then polling to determine the length of the spool to reach max_size or counter to the point where the main execution to output <- spoolcopy
Publisher------------
For events := range input {buffer. Truncate (0) compressor, _ := zlib. Newwriterlevel (&buffer, 3) for _, event := range events {sequence + = 1writedataframe (event, sequence, compressor)}compressor. Flush () compressor. Close () Compressed_payload := buffer. Bytes ()// send buffer until we ' Re successful...oops := func (err error) {// todo (Sissel): track how frequently we timeout and reconnect. if we ' Re// timing out too frequently, there ' s really no point in timing out since// basically everything is slow or Down. we ' ll want to ratchet up the// timeout value slowly until things improve, then ratchet it down once// things Seem healthy.emit ("socket error, will reconnect: %s\n", err) time. Sleep (1 * time. Second) socket. Close () socket = connect (config)}sendpayload:for {// abort if our whole request takes longer than the configured// network Timeout.socket.SetDeadline (time. Now (). ADD (config.timeout))// set the window size to the length of this payload in events._, err = socket. Write ([]byte ("1W")) If err != nil {oops (err) continue}binary. Write (socket, binary. Bigendian, uint32 (len (events))) If err != nil {oops (err) continue}// write Compressed framesocket. Write ([]byte ("1C")) If err != nil {oops (err) continue}binary. Write (socket, binary. Bigendian, uint32 (Len (compressed_payload))) If err != nil {oops (err) continue}_, eRr = socket. Write (compressed_payload) if err != nil {oops (err) continue}// read ackresponse := make ([]byte, 0, 6) Ackbytes := 0for ackbytes != 6 {n, err := socket. Read (Response[len (response): cap (response)) If err != nil {emit ("read error Looking for ack: %s\n ", err) socket. Close () socket = connect (config) continue sendpayload // retry sending on new connection} else {ackbytes += n}}// todo (Sissel): verify ack success, stop trying to send the payload.break}// tell the registrar that we ' ve successfully sent these eventsregistrar <- events} /* for each event payloadsocket connection sends data, mainly executes to registrar <- Events
Registrar ------------
Func registrar (state map[string]*filestate, input chan []*fileevent) {for events := range input {emit ("registrar: processing %d events\n", len (events))// take the last event found for each file Sourcefor _, event := range events {// skip stdinif *event. source == "-" {continue}ino, dev := file_ids (Event.fileinfo) state[*event. Source] = &filestate{source: event. Source,// take the offset + length of the line + newline char and// save it as the new starting offset.// This Issues a problem, if the eol is a crlf! then on start it read the lf again and generates a event wIth an empty lineoffset: event. Offset + int64 (Len (*event). Text)) + 1, // REVU: this is begging for BUGsInode: Ino,device: dev,}//log. Printf ("state %s: %d\n", *event. Source, event. Offset)}if e := writeregistry (state, ". Logstash-forwarder"); e != nil {// revu: but we should panic, or something, right?emit ("WARNING: (Continuing) update of registry returned error: %s ", e)}}}func Writeregistry (state map[string]*filestate, path string) error {tempfile := path + ". New" File, e := os. Create (tempfile) if e != nil {emit ("failed to create tempfile (%s) for writing: %s\n ", tempfile, e) return e}defer file. Close () Encoder := json. Newencoder (file) encoder. Encode (state) Return onregistrywrite (Path, tempfile)} Finally, the change is recorded in the JSON format in the. logstash-forwarder file, indicating that the log has been read to the offset place.
Logstash-forward Source Code Analysis