1、查看数据库的描述及路径
hive> describe database database_name;2、建表 创建脚本 /opt/createtable.sql
create table IF NOT EXISTS lining_test.emp( empno int, ename string, job string, mgr int, hiredate string, sal double, comm double, deptno int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';执行脚本
hive>source /opt/createtable.sql;加载数据
load data local inpath '/opt/test/emp.txt' overwrite into table emp;查询
use lining_test;select * from emp;增加列
use lining_test;alter table emp add columns(year string);3、分区表 创建脚本/opt/createparttable.sql
create table lining_test.emp_part( empno int, empname string, job string, mgr int, hiredate string, salary double, comm double, deptno int) partitioned by (year string, month string) row format delimited fields terminated by '\t';执行脚本
hive>source /opt/createparttable.sql;加载数据
load data local inpath '/opt/emp2.txt' into table lining_test.emp_part partition(year='2018',month='8'); load data local inpath '/opt/emp3.txt' into table lining_test.emp_part partition(year='2018',month='3'); load data local inpath '/opt/emp4.txt' into table lining_test.emp_part partition(year='2017',month='12');查询
use lining_test;select * from emp_part;查询表结构
describe lining_test.emp_part;4、直接用hive命令
hive -e "use lining_test;ALTER TABLE emp CHANGE payyear month string;" hive -f "select * from db_hive01.emp"5、导出结果
insert overwrite local directory '/opt/test/' row format delimited fields terminated by '\t' select * from emp_part; hive -e 'select * from emp_part' >> ./emp_export.txt6、导出到HDFS
insert overwrite directory '/export_data' select * from lining_test.emp;有local的速度明显比没有local慢
7、显示所有函数:
hive>show functions;查看函数的用法;
hive>describe function fun_name;常用函数
select upper(empname) from emp; select unix_timestamp(trackTime) from bflog limit 3 ; select year(hiredate) from emp ; select month(hiredate) from emp ; select hour(hiredate) from emp ; select substr(hiredate,1,4) from .emp ; select split(hiredate,'-')[1] from emp ; select reverse(hiredate) from emp ; select concat(empno,'-',empname) from emp ; case when 条件1 then ... when 条件2 then ... else end自定义UDF
add jar /opt/test/mylower.jar ; CREATE TEMPORARY FUNCTION mylower AS 'org.gh.hadoop.hive.MyLower';8、内连接: hive> SELECT sales., things. FROM sales JOIN things ON (sales.id = things.id);
9、查看hive为某个查询使用多少个MapReduce作业 hive> Explain SELECT sales., things. FROM sales JOIN things ON (sales.id = things.id);
10、左连接: hive> SELECT sales., things. FROM sales LEFT OUTER JOIN things ON (sales.id = things.id);
11、Map连接:Hive可以把较小的表放入每个Mapper的内存来执行连接操作 hive> SELECT /+ MAPJOIN(things) / sales., things. FROM sales JOIN things ON (sales.id = things.id);
12、insert 导入
INSERT OVERWRITE TABLE stations_by_year SELECT year, COUNT(DISTINCT station) GROUP BY year13、创建视图:
CREATE VIEW valid_records AS SELECT * FROM records2 WHERE temperature !=9999;14、hive中使用正则表达式 选出所有列名以price作为前缀的列 (1) hive>select 'price.*' from product; (2) 用Like或者RLike
15、order by 全局排序
insert overwrite local directory '/opt/test/local' row format delimited fields terminated by '\t' select * from emp order by empno;16、sort by 与 distributed by 类似MR中partition,进行分区,结合sort by使用每个reduce内部进行排序,全局不是排序, distribute by 一定是放在sort by 前面,且必须要指定mapreduce.job.reduces数量,否则导出结果还是在一个文件中
set mapreduce.job.reduces=3; insert overwrite local directory '/opt/test/local' row format delimited fields terminated by '\t' select * from emp distribute by deptno sort by empno;当distributed by和sort by 字段一样的时候,直接使用cluster by
17、聚合函数 可以通过设置属性hive.map.aggr值为true来提高聚合的性能: hive>hive.map.aggr=true;
18、word count
select word,count(*) from( select explode(split(sentence,' ')) as word from article )t group by word;