1.解压安装:
[root@zjw opt]# tar -zxf sqoop-1.4.6-cdh5.14.2.tar.gz [root@zjw opt]# mv sqoop-1.4.6-cdh5.14.2 soft/sqoop1462.添加所需驱动 连接Hadoop得三个驱动分别在这三个位置:
连接mysql的驱动可在自己idea maven本地仓库找。 添加驱动: 3. 配置文件 首先准备好hadoop和hive的环境变量:
[root@zjw conf]# echo $HADOOP_HOME /opt/soft/hadoop/hadoop-2.6.0-cdh5.14.2 [root@zjw conf]# echo $HIVE_HOME\ > /opt/soft/hive/hive-1.1.0-cdh5.14.2 export HADOOP_COMMON_HOME=/opt/soft/hadoop/hadoop-2.6.0-cdh5.14.2复制临时配置文件:
[root@zjw conf]# cp sqoop-env-template.sh sqoop-env.sh在里面添加环境变量:
#Set path to where hadoop-*-core.jar is available export HADOOP_MAPRED_HOME=/opt/soft/hadoop/hadoop-2.6.0-cdh5.14.2 #set the path to where bin/hbase is available #export HBASE_HOME= #Set the path to where bin/hive is available export HIVE_HOME=/opt/soft/hive/hive-1.1.0-cdh5.14.2 #Set the path for where zookeper config dir is export ZOOCFGDIR=/opt/soft/zookeeper/zookeeper-3.4.5-cdh5.14.2/conf4.环境变量配置
# SQOOP_HOME export SQOOP_HOME=/opt/soft/sqoop146 export PATH=$PATH:$SQOOP_HOME/bin然后激活环境变量:
[root@zjw sqoop146]# vi /etc/profile [root@zjw sqoop146]# source /etc/profile启动hadoop 然后准备利用sqoop导出:
此时会可能会出现少jar包的错误:
下载地址:java-json.jar
再次执行: 查看其中的一个分片内容:
[root@zjw conf]# hdfs dfs -text /tmp/user/part-m-00000 20/06/26 11:01:55 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 1,zs,male查看查询的内容:
[root@zjw conf]# hdfs dfs -text /tmp/user1/part-m-00000 20/06/26 11:33:59 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 1,zs,male然后追加6-26日的:
[root@zjw myshl]# sqoop import --username root --password 1234 --connect jdbc:mysql://192.168.56.100:3306/mydemo --driver com.mysql.jdbc.Driver --query "select * from myorder where orderdate='2020-06-26' and \$CONDITIONS" --incremental append --check-column orderdate --target-dir /mydata/mytest -m 1创建一个外部表接数据:
hive> create database myddd; OK Time taken: 0.8 seconds hive> use myddd; hive> create external table mytab( > custid string, > custname string, > birthday string > ) > row format delimited fields terminated by ',' > location '/mydata/mytest'; OK Time taken: 0.277 seconds hive> select * from mytab; OK 1 dd0001 2020-06-25 2 dd0002 2020-06-25 3 dd0003 2020-06-26 4 dd0004 2020-06-26 3 dd0003 2020-06-26 4 dd0004 2020-06-26 Time taken: 0.388 seconds, Fetched: 6 row(s)每次运行 这边外部表就会接取数据