学习笔记(2):大数据之Hive-基本查询

    技术2025-11-04  27

    立即学习: https://edu.csdn.net/course/play/8005/164134?utm_source=blogtoedu

    文章目录

    导出数据到目录查询语句select函数运算聚合函数去重首字符的 ascii 码进制编码类型转换字符串拼接分页wherecase when then数据范围group by 分组查询 如何避免 map reduce 操作

    导出数据到目录

    create table test3 as select id,name from test2 where province='hebei' and city='baoding'; --导出hive数据到本地目录(下载) insert overwrite local directory '/home/hadoop/hive' select * from hive1.test1 where province='hebei'; --导出hive数据到hdfs目录 insert overwrite directory '/user/hadoop/data' select * from hive1.test1 where province='hebei'; --查询数据向多个目录同时输送 from hive1.test2 t insert overwrite directory '/home/hadoop/hive/hebei' select * where t.province='hebei' insert overwrite directory '/home/hadoop/hive/henan' select * where t.province='henan';

    查询语句

    select

    select col1,col2 from table t;

    函数运算

    select upper(name) from test2; // name大写 select lower(name) from test2; // name小写 select age + 10 from test2; select round(12.34); //12 四舍五入 select floor(12.34); //12 地板 select ceil(12.34); //13 天花板 select rand(10); //随机数

    聚合函数

    select count(*) from test2; select sum(age) from test2; select avg(age) from test2; select max(age) from test2; select min(age) from test2;

    去重

    select count(distinct name) from test2;

    首字符的 ascii 码

    select ascii("abc"); select ascii('abc');

    进制编码

    select base64(binary('http://localhost:8080/helloworld')); select binary('http://localhost:8080/helloworld');

    类型转换

    select cast('120' as bigint) + 200; //320 select '120' + 200; //320.0

    字符串拼接

    select concat('ab','cd'); //abcd

    分页

    select * from test2 limit 1,2; //从第二条开始,查出来两条

    where

    from test2 e select e.id,e.name,e.age where e.city='baoding'; select e.id,e.name,e.age from test2 e where e.city='baoding';

    case when then

    select name,age, case when age<20 then 'low' when age>50 then 'old' else 'big' end as agestatus from hive1.test2;

    注意!!! hive 中的 where 语句不能使用别名

    --select id, name n ,age from test2 where n like 't%'; //wrong where中不能使用字段别名

    数据范围

    select * from test2 where age between 12 and 24; select * from test2 where age <=24 and age >12;

    group by 分组查询

    select count(*),province from test2 group by province; select count(*) as c, province from test2 group by province having c>3;

    如何避免 map reduce 操作

    不使用mr作业的模式就是本地作业,下面方法可以尽量避免mr作业。

    全表扫描,没有where子句 select * from test; where子句作用只有分区字段,也不需要mr(limit也不需要) select * from test2 where province='hebei'; 设置该属性后,hive会尽量使用local模式查询 set hive.exec.mode.local.auto=true;
    Processed: 0.015, SQL: 9