文章目录
基本操作建表描述表,查看schema从文件导入数据hive 中执行 shell 命令hive 中执行 hdfs 命令拷贝表
内部表和外部表内部表外部表
分区表建表描述分区表载入数据到指定分区查询数据查看 hdfs分区表的查询模式: strict/nostrict查看有哪些分区增加分区修改表名移动分区的存储位置复制表
基本操作
建表
show databases;
show tables;
create database mydb
;
create table if not exists
mydb
.employee
(
eid
int,
name String
,
salary String
,
destination String
)
COMMENT 'Employee details'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n' STORED
AS TEXTFILE
;
描述表,查看schema
DESCRIBE mydb
.employee
;
从文件导入数据
load data local inpath
'/root/text.csv' into table employee
;
hive 中执行 shell 命令
!clear; //hive中执行shell命令, 清空屏幕
hive 中执行 hdfs 命令
dfs -ls -R /; //hive中执行hdfs命令
拷贝表
create table mydb
.employee2
as select * from myhive
.employee
;
内部表和外部表
内部表
托管表(内部表),hive默认创建的表都是托管表,hive控制其数据的生命周期。删除托管表时,元数据和数据均被删除。
create table if not exists
mydb
.employee_inner
(
eid
int,
name String
,
salary String
,
destination String
)
COMMENT 'Employee details'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED
AS TEXTFILE
;
drop table mydb
.employee_inner
;
外部表
hive 控制元数据,删除外部表时,数据不被删除
create external
table if not exists
myhive
.employee_outer
(
eid
int,
name String
,
salary String
,
destination String
)
COMMENT 'Employee details'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED
AS TEXTFILE
;
分区表
建表
partitioned by
create table if not exists
test2
(
eid
int,
name String
,
salary String
,
destination String
)
partitioned
by(country string
,state string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED
AS TEXTFILE
;
描述分区表
载入数据到指定分区
load data local inpath
'/root/text.csv'
into table mydb
.test2
partition(country
='china',state
='shanxi');
查询数据
select * from mydb
.test2
where country
='china' and state
='shanxi';
查看 hdfs
分区表的查询模式: strict/nostrict
默认是非严格,严格模式要求查询的时候必须带分区
set hive
.mapred
.mode=strict
查看有哪些分区
show partitions mydb
.test2
;
show partitions mydb
.test2
partition(state
='shanxi');
增加分区
注意不能增加不存在的分区列
alter table mydb
.test2
add partition(country
='china',state
='henan');
alter table mydb
.test2
add
partition(country
='china',state
='hubei')
partition(country
='china',state
='hebei')
partition(country
='china',state
='shandong');
修改表名
alter table mydb
.test2
rename to mydb
.test
;
移动分区的存储位置
alter table mydb
.test
partition(country
='china',state
='hubei')
set location
'/user/hive/warehouse/myhive.db/test/country=china/state=hubei1';
验证一下:
load data local inpath
'/root/text.csv'
into table mydb
.test
partition(country
='china',state
='shanxi');
dfs
-ls
/user/hive
/warehouse
/mydb
.db
/test
/country
=china
/state
=hubei
;
dfs
-ls
/user/hive
/warehouse
/myhive
.db
/test
/country
=china
;
复制表
create table if not exists
mydb
.test3
(
eid
int,
name String
,
salary String
,
destination String
)
partitioned
by(country string
,state string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED
AS TEXTFILE
;
insert into mydb
.test3
partition(country
='china',state
='hubei')
select eid
,name
,salary
,destination
from mydb
.test
where country
='china' and state
='henan';