搭建sqoop集群

  • 2020 年 3 月 12 日
  • 笔记

下载sqoop

  • wget wget https://mirrors.tuna.tsinghua.edu.cn/apache/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
  • 下载地址

https://mirrors.tuna.tsinghua.edu.cn/apache/sqoop/1.4.7/

上传sqoop到主节点并解压

cd /opt  rz # 选择sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz并上传  # 没有rz命令使用 yum install lrzsz安装  # 解压sqoop  tar -zxvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz  # 重命名  mv sqoop-1.4.7.bin__hadoop-2.6.0 sqoop-1.4.7

配置sqoop环境变量

# 修改配置文件  vi /etc/profile  # 添加如下内容  export SQOOP_HOME=/opt/sqoop-1.4.7  export PATH=$PATH:$SQOOP_HOME/bin  # 保存后退出  # 使配置文件生效  source /etc/profile

修改sqoop配置文件

  • 修改配置文件sqoop-env.sh
  cd /opt/sqoop-1.4.7/conf/    cp sqoop-env-template.sh sqoop-env.sh    vi sqoop-env.sh    # 然后修改配置如下后保存    #Set path to where bin/hadoop is available    export HADOOP_COMMON_HOME=/opt/hadoop-2.7.7      #Set path to where hadoop-*-core.jar is available    export HADOOP_MAPRED_HOME=/opt/hadoop-2.7.7      #set the path to where bin/hbase is available    export HBASE_HOME=/opt/hbase-1.2.6      #Set the path to where bin/hive is available    export HIVE_HOME=/opt/hive-2.3.4      #Set the path for where zookeper config dir is    #export ZOOCFGDIR=    export HCAT_HOME=/opt/sqoop-1.4.7/tmp/HCatalog    export ACCUMULO_HOME=/opt/sqoop-1.4.7/tmp/accumulo    # 创建必备的文件夹    cd /opt/sqoop1.4.7/    mkdir tmp    cd tmp    mkdir HCatalog accumulo
  • 上传JDBC的jar包
cd opt/sqoop1.4.6/lib/  # hive已上传过,可以直接复制,或者使用rz本地上传即可  cp /opt/hive-2.3.4/lib/mysql-connector-java-5.1.41-bin.jar ./  [root@hservice lib]# ll  总用量 17764  -rw-rw-r-- 1 1000 1000  224277 12月 19 2017 ant-contrib-1.0b3.jar  -rw-rw-r-- 1 1000 1000   36455 12月 19 2017 ant-eclipse-1.0-jvm1.2.jar  -rw-rw-r-- 1 1000 1000 1344870 12月 19 2017 avro-1.8.1.jar  -rw-rw-r-- 1 1000 1000  186260 12月 19 2017 avro-mapred-1.8.1-hadoop2.jar  -rw-rw-r-- 1 1000 1000   58160 12月 19 2017 commons-codec-1.4.jar  -rw-rw-r-- 1 1000 1000  365552 12月 19 2017 commons-compress-1.8.1.jar  -rw-rw-r-- 1 1000 1000  109043 12月 19 2017 commons-io-1.4.jar  -rw-rw-r-- 1 1000 1000  267634 12月 19 2017 commons-jexl-2.1.1.jar  -rw-rw-r-- 1 1000 1000  434678 12月 19 2017 commons-lang3-3.4.jar  -rw-rw-r-- 1 1000 1000   60686 12月 19 2017 commons-logging-1.1.1.jar  -rw-rw-r-- 1 1000 1000  706710 12月 19 2017 hsqldb-1.8.0.10.jar  -rw-rw-r-- 1 1000 1000   36519 12月 19 2017 jackson-annotations-2.3.1.jar  -rw-rw-r-- 1 1000 1000  197986 12月 19 2017 jackson-core-2.3.1.jar  -rw-rw-r-- 1 1000 1000  232248 12月 19 2017 jackson-core-asl-1.9.13.jar  -rw-rw-r-- 1 1000 1000  914311 12月 19 2017 jackson-databind-2.3.1.jar  -rw-rw-r-- 1 1000 1000  780664 12月 19 2017 jackson-mapper-asl-1.9.13.jar  -rw-rw-r-- 1 1000 1000 2178774 12月 19 2017 kite-data-core-1.1.0.jar  -rw-rw-r-- 1 1000 1000 1801469 12月 19 2017 kite-data-hive-1.1.0.jar  -rw-rw-r-- 1 1000 1000 1768012 12月 19 2017 kite-data-mapreduce-1.1.0.jar  -rw-rw-r-- 1 1000 1000 1765905 12月 19 2017 kite-hadoop-compatibility-1.1.0.jar  -rw-r--r-- 1 root root  992808 9月  30 09:40 mysql-connector-java-5.1.41-bin.jar # 已上传  -rw-rw-r-- 1 1000 1000   19827 12月 19 2017 opencsv-2.3.jar  -rw-rw-r-- 1 1000 1000   34604 12月 19 2017 paranamer-2.7.jar  -rw-rw-r-- 1 1000 1000   53464 12月 19 2017 parquet-avro-1.6.0.jar  -rw-rw-r-- 1 1000 1000  892808 12月 19 2017 parquet-column-1.6.0.jar  -rw-rw-r-- 1 1000 1000   20998 12月 19 2017 parquet-common-1.6.0.jar  -rw-rw-r-- 1 1000 1000  279012 12月 19 2017 parquet-encoding-1.6.0.jar  -rw-rw-r-- 1 1000 1000  375618 12月 19 2017 parquet-format-2.2.0-rc1.jar  -rw-rw-r-- 1 1000 1000   20744 12月 19 2017 parquet-generator-1.6.0.jar  -rw-rw-r-- 1 1000 1000  205389 12月 19 2017 parquet-hadoop-1.6.0.jar  -rw-rw-r-- 1 1000 1000 1033299 12月 19 2017 parquet-jackson-1.6.0.jar  -rw-rw-r-- 1 1000 1000   25496 12月 19 2017 slf4j-api-1.6.1.jar  -rw-rw-r-- 1 1000 1000  592319 12月 19 2017 snappy-java-1.1.1.6.jar  -rw-rw-r-- 1 1000 1000   99555 12月 19 2017 xz-1.5.jar

测试sqoop

# 启动hadoop  start-all.sh  # 测试sqoop  [root@hservice tmp]# sqoop list-tables  --username hive  --password hive --connect jdbc:mysql://192.168.129.200:3306/hive?useSSL=false  19/09/30 10:08:09 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7  19/09/30 10:08:09 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.  19/09/30 10:08:10 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.  AUX_TABLE  BUCKETING_COLS  CDS  COLUMNS_V2  COMPACTION_QUEUE  COMPLETED_COMPACTIONS  COMPLETED_TXN_COMPONENTS  DATABASE_PARAMS  DBS  DB_PRIVS  DELEGATION_TOKENS  FUNCS  FUNC_RU  GLOBAL_PRIVS  HIVE_LOCKS  IDXS  INDEX_PARAMS  KEY_CONSTRAINTS  MASTER_KEYS  NEXT_COMPACTION_QUEUE_ID  NEXT_LOCK_ID  NEXT_TXN_ID  NOTIFICATION_LOG  NOTIFICATION_SEQUENCE  NUCLEUS_TABLES  PARTITIONS  PARTITION_EVENTS  PARTITION_KEYS  PARTITION_KEY_VALS  PARTITION_PARAMS  PART_COL_PRIVS  PART_COL_STATS  PART_PRIVS  ROLES  ROLE_MAP  SDS  SD_PARAMS  SEQUENCE_TABLE  SERDES  SERDE_PARAMS  SKEWED_COL_NAMES  SKEWED_COL_VALUE_LOC_MAP  SKEWED_STRING_LIST  SKEWED_STRING_LIST_VALUES  SKEWED_VALUES  SORT_COLS  TABLE_PARAMS  TAB_COL_STATS  TBLS  TBL_COL_PRIVS  TBL_PRIVS  TXNS  TXN_COMPONENTS  TYPES  TYPE_FIELDS  VERSION  WRITE_SET

配置子节点的sqoop

  • scp远程打包sqoop scp -r /opt/sqoop-1.4.7/ root@node1:/opt/ # 远程传输到node1节点 scp -r /opt/sqoop-1.4.7/ root@node2:/opt/ # 远程传输到node2节点
  • 配置环境变量 # 修改配置文件 vi /etc/profile # 添加如下内容 export SQOOP_HOME=/opt/sqoop-1.4.7 export PATH=$PATH:$SQOOP_HOME/bin # 保存后退出 # 使配置文件生效 source /etc/profile
  • 字节的测试scoop
[root@node2 ~]# sqoop list-tables  --username hive  --password hive --connect jdbc:mysql://192.168.129.200:3306/hive?useSSL=false  19/09/30 10:23:06 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7  19/09/30 10:23:07 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.  19/09/30 10:23:07 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.  AUX_TABLE  BUCKETING_COLS  CDS  COLUMNS_V2  COMPACTION_QUEUE  COMPLETED_COMPACTIONS  COMPLETED_TXN_COMPONENTS  DATABASE_PARAMS  DBS  DB_PRIVS  DELEGATION_TOKENS  FUNCS  FUNC_RU  GLOBAL_PRIVS  HIVE_LOCKS  IDXS  INDEX_PARAMS  KEY_CONSTRAINTS  MASTER_KEYS  NEXT_COMPACTION_QUEUE_ID  NEXT_LOCK_ID  NEXT_TXN_ID  NOTIFICATION_LOG  NOTIFICATION_SEQUENCE  NUCLEUS_TABLES  PARTITIONS  PARTITION_EVENTS  PARTITION_KEYS  PARTITION_KEY_VALS  PARTITION_PARAMS  PART_COL_PRIVS  PART_COL_STATS  PART_PRIVS  ROLES  ROLE_MAP  SDS  SD_PARAMS  SEQUENCE_TABLE  SERDES  SERDE_PARAMS  SKEWED_COL_NAMES  SKEWED_COL_VALUE_LOC_MAP  SKEWED_STRING_LIST  SKEWED_STRING_LIST_VALUES  SKEWED_VALUES  SORT_COLS  TABLE_PARAMS  TAB_COL_STATS  TBLS  TBL_COL_PRIVS  TBL_PRIVS  TXNS  TXN_COMPONENTS  TYPES  TYPE_FIELDS  VERSION  WRITE_SET