大数据组件-Flume自定义拦截器,高阶自定义组件
版本统一:jdk:1.8hadoop:2.5.7zk:flume:目录标题1.自定义拦截器(1)配置maven依赖(2)java编写实现的整体概览(3)代码实现模板(4)打jar包,上传jar包到flume的lib目录(5)设置自定义flume的配置文件(6)启动fluem开始监听Flume高阶自定义组件1.自定义拦截器flume提供了java的API接口,我们可以通过java来实现自定义拦截器(
·
版本统一:
jdk:1.8
hadoop:2.5.7
zk:
flume:
目录标题
1.自定义拦截器
flume提供了java的API接口,我们可以通过java来实现自定义拦截器
(1)配置maven依赖
<dependencies>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-sdk</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.8.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
<!-- <verbal>true</verbal>-->
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass></mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
(2)java编写实现的整体概览
首先先简单的说以下思路,在使用自定义构造时也就是说Builder这个CustomParameterInterceptor类的内部类实现的功能就是,通过flume content上下环境,获取自定义配置的参数,通过其中的build方法传递给CustomParameterInterceptor这个外部类的有参构造方法.当进行拦截时会调用外部类的intercept方法,他就是具体实现拦截的方法.
(3)代码实现模板
你可以在idea中创建一个名字叫 CustomParameterInterceptor类将下面内容copy到idea环境中
package cn.itcast.interceptor;
import com.google.common.base.Charsets;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static cn.itcast.interceptor.CustomParameterInterceptor.Constants.*;
/**
* Created by itcast
*/
public class CustomParameterInterceptor implements Interceptor{
/** The field_separator.指明每一行字段的分隔符 */
private final String fields_separator;
/** The indexs.通过分隔符分割后,指明需要那列的字段 下标*/
private final String indexs;
/** The indexs_separator. 多个下标的分隔符*/
private final String indexs_separator;
/** The encrypted_field_index. 需要加密的字段下标*/
private final String encrypted_field_index;
/**
* todo3 通过CustomParameterInterceptor的构造器(有参构造)获取到配置文件中的参数
*/
public CustomParameterInterceptor( String fields_separator,
String indexs, String indexs_separator,String encrypted_field_index) {
String f = fields_separator.trim(); //字段做一个去空操作
String i = indexs_separator.trim();
this.indexs = indexs;
this.encrypted_field_index=encrypted_field_index.trim();
if (!f.equals("")) {
f = UnicodeToString(f);
}
this.fields_separator =f;
if (!i.equals("")) {
i = UnicodeToString(i);
}
this.indexs_separator = i;
}
/*
*
* \t 制表符 ('\u0009')
*
*/
public static String UnicodeToString(String str) {
Pattern pattern = Pattern.compile("(\\\\u(\\p{XDigit}{4}))");
Matcher matcher = pattern.matcher(str);
char ch;
while (matcher.find()) {
ch = (char) Integer.parseInt(matcher.group(2), 16);
str = str.replace(matcher.group(1), ch + "");
}
return str;
}
/*
* @see org.apache.flume.interceptor.Interceptor#intercept(org.apache.flume.Event)
* todo 该方法就是自定义具体功能实现的所在,
*/
public Event intercept(Event event) {
if (event == null) {
return null;
}
try {
//todo 通过getBody()方法将event数据类型转换成为字节数组,在通过字节数据加编码类型构造出一个字符串
String line = new String(event.getBody(), Charsets.UTF_8);
String[] fields_spilts = line.split(fields_separator);
String[] indexs_split = indexs.split(indexs_separator); //[0,1,3,5,6]
String newLine="";
//todo 遍历需要保留下来的下标数组0,1,3,5,6,我们拿数组的索引去取得数据
for (int i = 0; i < indexs_split.length; i++) {
int parseInt = Integer.parseInt(indexs_split[i]);//字符串索引转换为数字索引
//todo 对加密字段进行加密
if(!"".equals(encrypted_field_index)&&encrypted_field_index.equals(indexs_split[i])){
newLine+=StringUtils.GetMD5Code(fields_spilts[parseInt]);
}else{
newLine+=fields_spilts[parseInt];
}
//todo 如果不是最后一个,就是字符串加分隔符,如果是最后一个就直接字符串
if(i!=indexs_split.length-1){
newLine+=fields_separator;
}
}
event.setBody(newLine.getBytes(Charsets.UTF_8));
return event;
} catch (Exception e) {
return event;
}
}
/*
* @see org.apache.flume.interceptor.Interceptor#intercept(java.util.List)
* todo event批处理方法,最终还是调用的intercept(event)
*/
public List<Event> intercept(List<Event> events) {
List<Event> out = new ArrayList<Event>();
for (Event event : events) {
Event outEvent = intercept(event);
if (outEvent != null) {
out.add(outEvent);
}
}
return out;
}
/*
* @see org.apache.flume.interceptor.Interceptor#initialize()
*/
public void initialize() {
// TODO Auto-generated method stub
}
/*
* @see org.apache.flume.interceptor.Interceptor#close()
*/
public void close() {
// TODO Auto-generated method stub
}
public static class Builder implements Interceptor.Builder {
/** The fields_separator.指明每一行字段的分隔符 */
private String fields_separator;
/** The indexs.通过分隔符分割后,指明需要那列的字段 下标*/
private String indexs;
/** The indexs_separator. 多个下标下标的分隔符*/
private String indexs_separator;
/** The encrypted_field. 需要加密的字段下标*/
private String encrypted_field_index;
/*
* @see org.apache.flume.conf.Configurable#configure(org.apache.flume.Context)
* todo 1.通过flume context上下文环境变量读取采集方案中配置的属性,如果没有配置参数他会通过默认值进行实现
*/
public void configure(Context context) {
fields_separator = context.getString(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR);
indexs = context.getString(INDEXS, DEFAULT_INDEXS);
indexs_separator = context.getString(INDEXS_SEPARATOR, DEFAULT_INDEXS_SEPARATOR);
encrypted_field_index= context.getString(ENCRYPTED_FIELD_INDEX, DEFAULT_ENCRYPTED_FIELD_INDEX);
}
/*
* @see org.apache.flume.interceptor.Interceptor.Builder#build()
* todo 2.通过build方法,把解析的参数传递给自定义拦截器的实例对象
*/
public Interceptor build() {
return new CustomParameterInterceptor(fields_separator, indexs, indexs_separator,encrypted_field_index);
}
}
/**
* The Class Constants.
*
*/
public static class Constants {
/** The Constant FIELD_SEPARATOR. */
public static final String FIELD_SEPARATOR = "fields_separator";
/** The Constant DEFAULT_FIELD_SEPARATOR. */
public static final String DEFAULT_FIELD_SEPARATOR =" ";
/** The Constant INDEXS. */
public static final String INDEXS = "indexs";
/** The Constant DEFAULT_INDEXS. */
public static final String DEFAULT_INDEXS = "0";
/** The Constant INDEXS_SEPARATOR. */
public static final String INDEXS_SEPARATOR = "indexs_separator";
/** The Constant DEFAULT_INDEXS_SEPARATOR. */
public static final String DEFAULT_INDEXS_SEPARATOR = ",";
/** The Constant ENCRYPTED_FIELD_INDEX. */
public static final String ENCRYPTED_FIELD_INDEX = "encrypted_field_index";
/** The Constant DEFAUL_TENCRYPTED_FIELD_INDEX. */
public static final String DEFAULT_ENCRYPTED_FIELD_INDEX = "";
/** The Constant PROCESSTIME. */
public static final String PROCESSTIME = "processTime";
/** The Constant PROCESSTIME. */
public static final String DEFAULT_PROCESSTIME = "a";
}
/**
* 字符串md5加密
*/
public static class StringUtils {
// 全局数组
private final static String[] strDigits = { "0", "1", "2", "3", "4", "5",
"6", "7", "8", "9", "a", "b", "c", "d", "e", "f" };
// 返回形式为数字跟字符串
private static String byteToArrayString(byte bByte) {
int iRet = bByte;
// System.out.println("iRet="+iRet);
if (iRet < 0) {
iRet += 256;
}
int iD1 = iRet / 16;
int iD2 = iRet % 16;
return strDigits[iD1] + strDigits[iD2];
}
// 返回形式只为数字
private static String byteToNum(byte bByte) {
int iRet = bByte;
System.out.println("iRet1=" + iRet);
if (iRet < 0) {
iRet += 256;
}
return String.valueOf(iRet);
}
// 转换字节数组为16进制字串
private static String byteToString(byte[] bByte) {
StringBuffer sBuffer = new StringBuffer();
for (int i = 0; i < bByte.length; i++) {
sBuffer.append(byteToArrayString(bByte[i]));
}
return sBuffer.toString();
}
public static String GetMD5Code(String strObj) {
String resultString = null;
try {
resultString = new String(strObj);
MessageDigest md = MessageDigest.getInstance("MD5");
// md.digest() 该函数返回值为存放哈希值结果的byte数组
resultString = byteToString(md.digest(strObj.getBytes()));
} catch (NoSuchAlgorithmException ex) {
ex.printStackTrace();
}
return resultString;
}
}
}
(4)打jar包,上传jar包到flume的lib目录
cd /export/servers/flume
将jar包复制到桌面,使用xftp进行上传
(5)设置自定义flume的配置文件
创建spool-interceptor-hdfs.conf的配置文件写入以下内容
a1.channels = c1
a1.sources = r1
a1.sinks = s1
#channel
a1.channels.c1.type = memory
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=200
#source
a1.sources.r1.channels = c1
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /root/logs4/
a1.sources.r1.batchSize= 50
a1.sources.r1.inputCharset = UTF-8
a1.sources.r1.interceptors =i1 i2
a1.sources.r1.interceptors.i1.type =cn.itcast.interceptor.CustomParameterInterceptor$Builder
a1.sources.r1.interceptors.i1.fields_separator=\\u0009
a1.sources.r1.interceptors.i1.indexs =0,1,3,5,6
a1.sources.r1.interceptors.i1.indexs_separator =\\u002c
a1.sources.r1.interceptors.i1.encrypted_field_index =0
a1.sources.r1.interceptors.i2.type = org.apache.flume.interceptor.TimestampInterceptor$Builder
#sink
a1.sinks.s1.channel = c1
a1.sinks.s1.type = hdfs
a1.sinks.s1.hdfs.path =flume/intercept/%Y%m%d
a1.sinks.s1.hdfs.filePrefix = itcasr
a1.sinks.s1.hdfs.fileSuffix = .dat
a1.sinks.s1.hdfs.rollSize = 10485760
a1.sinks.s1.hdfs.rollInterval =20
a1.sinks.s1.hdfs.rollCount = 0
a1.sinks.s1.hdfs.batchSize = 2
a1.sinks.s1.hdfs.round = true
a1.sinks.s1.hdfs.roundUnit = minute
a1.sinks.s1.hdfs.threadsPoolSize = 25
a1.sinks.s1.hdfs.useLocalTimeStamp = true
a1.sinks.s1.hdfs.minBlockReplicas = 1
a1.sinks.s1.hdfs.fileType =DataStream
a1.sinks.s1.hdfs.writeFormat = Text
a1.sinks.s1.hdfs.callTimeout = 60000
a1.sinks.s1.hdfs.idleTimeout =60
创建logs4目录,给其中放置数据文件
mkdir logs4
(6)启动fluem开始监听
bin/flume-ng agent -c conf -f conf/spool-interceptor-hdfs.conf -n a1 -Dflume.root.logger=INFO,console
Flume高阶自定义组件
在这里插入代码片
更多推荐
所有评论(0)