The code is as follows:
Package Com.wy.flume.interceptor;import Java.util.list;import Java.util.map;import java.util.regex.matcher;import Java.util.regex.pattern;import Org.apache.commons.lang.stringutils;import Org.apache.flume.context;import Org.apache.flume.event;import Org.apache.flume.interceptor.interceptor;import Org.apache.flume.interceptor.regexextractorinterceptorpassthroughserializer;import Org.apache.flume.interceptor.regexextractorinterceptorserializer;import Org.slf4j.logger;import Org.slf4j.loggerfactory;import Com.google.common.base.charsets;import com.google.common.base.Preconditions; Import Com.google.common.base.throwables;import Com.google.common.collect.lists;public class Regexextractorheaderinterceptor implements Interceptor {static final String regex = "regex"; Static final String serializers = "serializers"; Static final String Extractor_header = "Extractorheader"; Static Final Boolean default_extractor_header = false; Static final String Extractor_header_key = "Extractorheaderkey"; private static final Logger Logger = loggerfactory. GetLogger (Regexextractorheaderinterceptor.class); Private final Pattern regex; Private final list<nameandserializer> serializers; Private Final Boolean Extractorheader; Private final String Extractorheaderkey; Private Regexextractorheaderinterceptor (Pattern regex, list<nameandserializer> Serializers,boolean ExtractorH Eader, String extractorheaderkey) {This.regex = regex; This.serializers = serializers; This.extractorheader = Extractorheader; This.extractorheaderkey = Extractorheaderkey; } @Override public void Initialize () {//No-op ... } @Override public void Close () {//No-op ... } @Override public Event intercept (event event) {String extractorheaderval; if (extractorheader) {extractorheaderval = Event.getheaders (). get (Extractorheaderkey); }else{extractorheaderval = new String (Event.getbody (), charsets.utf_8); } Matcher Matcher = Regex.matcher (extractorheaderval); map<string, string> headers = Event.getheaders (); if (Matcher.find ()) {for (int group = 0, Count = Matcher.groupcount (); group < count; group++) {int g Roupindex = group + 1; if (Groupindex > Serializers.size ()) {if (logger.isdebugenabled ()) {Logger.debug ("skipping GR OUP {} to {} Due to missing serializer ", group, Count); } break; } Nameandserializer Serializer = Serializers.get (group); if (logger.isdebugenabled ()) {Logger.debug ("serializing {} using {}", Serializer.headername, SE Rializer.serializer); } headers.put (Serializer.headername, Serializer.serializer.serialize (Matcher.group (Groupindex))); }} returnEvent } @Override public list<event> intercept (list<event> events) {list<event> intercepted = List S.newarraylistwithcapacity (Events.size ()); For (event event:events) {Event interceptedevent = intercept (event); if (interceptedevent! = null) {Intercepted.add (interceptedevent); }} return intercepted; } public static class Builder implements Interceptor.builder {private Pattern regex; Private list<nameandserializer> serializerlist; Private Boolean Extractorheader; Private String Extractorheaderkey; Private final Regexextractorinterceptorpassthroughserializer Defaultserializer = new Regexextractorinterceptorpassthroughserializer (); @Override public void Configure (context context) {String regexstring = context.getstring (REGEX); Preconditions.checkargument (! Stringutils.isempty (regexstring), "must supply a valid regex StRing "); Regex = Pattern.compile (regexstring); Regex.pattern (); Regex.matcher (""). GroupCount (); Configureserializers (context); Extractorheader = Context.getboolean (Extractor_header,default_extractor_header); if (extractorheader) {Extractorheaderkey = context.getstring (Extractor_header_key); Preconditions.checkargument (! Stringutils.isempty (Extractorheaderkey), "header key must"); }} private void Configureserializers (context context) {String serializerliststr = context.getst Ring (serializers); Preconditions.checkargument (! Stringutils.isempty (SERIALIZERLISTSTR), "must supply at least one name and serializer"); string[] Serializernames = Serializerliststr.split ("\\s+"); Context serializercontexts = new Context (Context.getsubproperties (serializers + ".")); Serializerlist = lists.newarraylistwithcapacity (seriaLizernames.length); for (String serializername:serializernames) {Context serializercontext = new Context (Serializerco Ntexts.getsubproperties (Serializername + ".")); String type = serializercontext.getstring ("type", "DEFAULT"); String name = serializercontext.getstring ("name"); Preconditions.checkargument (! Stringutils.isempty (name), "supplied name cannot be empty."); if ("DEFAULT". Equals (Type)) {Serializerlist.add (new Nameandserializer (name, Defaultserializer)); } else {Serializerlist.add (new Nameandserializer (Name, Getcustomserializer (type, Serializercon text)); }}} private Regexextractorinterceptorserializer Getcustomserializer (String clazzname, Context Context) {try {Regexextractorinterceptorserializer serializer = (Regexextractorinterceptorserializer) C Lass. forname (clazzname). newinstance(); Serializer.configure (context); return serializer; } catch (Exception e) {logger.error ("Could not instantiate event serializer.", e); Throwables.propagate (e); } return Defaultserializer; } @Override Public Interceptor Build () {preconditions.checkargument (regex! = NULL, "Regex Pat Tern was misconfigured "); Preconditions.checkargument (serializerlist.size () > 0, "must supply a valid group match ID list"); return new Regexextractorheaderinterceptor (regex, Serializerlist, Extractorheader, Extractorheaderkey); }} static class Nameandserializer {private final String headername; Private final Regexextractorinterceptorserializer serializer; Public Nameandserializer (String headername, Regexextractorinterceptorserializer serializer) {This.headern ame = Headername; This.serializer = serializer; } } }
Application configuration:
Hdp2.sources.s1.interceptors = I2
Hdp2.sources.s1.interceptors.i2.type = Com.wy.flume.interceptor.regexextractorheaderinterceptor$builder
Hdp2.sources.s1.interceptors.i2.regex = ([^_]+) _ (\\d{8}). *
Hdp2.sources.s1.interceptors.i2.extractorHeader = True
Hdp2.sources.s1.interceptors.i2.extractorHeaderKey = basename
Hdp2.sources.s1.interceptors.i2.serializers = S1 s2
Hdp2.sources.s1.interceptors.i2.serializers.s1.name = Log_type
Hdp2.sources.s1.interceptors.i2.serializers.s2.name = Log_day
Flume-ng custom interceptors to separate more headers from regular matching of fields in the header