Hive创建表及插入数据demo-成都创新互联网站建设

关于创新互联

多方位宣传企业产品与服务 突出企业形象

公司简介 公司的服务 荣誉资质 新闻动态 联系我们

Hive创建表及插入数据demo

create table student(id int comment "学生id",name string comment "学生姓名",age int comment "学生年龄")
comment "学生信息表"
row format delimited fields terminated by ",";

创新互联专业为企业提供松原网站建设、松原做网站、松原网站设计、松原网站制作等企业网站建设、网页设计与制作、松原企业网站模板建站服务,十余年松原做网站经验,不只是建网站,更提供有价值的思路和整体网络服务。

create external table student_ext(id int comment "学生id",name string comment "学生姓名",age int comment "学生年龄")
comment "学生信息表"
row format delimited fields terminated by ","
location "/user/hive/student_ext";

create external table student_ptn(id int comment "学生id",name string comment "学生姓名",age int comment "学生年龄")
comment "学生信息表"
partitioned by (city string)
row format delimited fields terminated by ","
location "/user/hive/student_ptn";

set hive.exec.dynamici.partition=true; #开启动态分区,默认是false
set hive.exec.dynamic.partition.mode=nonstrict; #开启允许所有分区都是动态的,否则必须要有静态分区才能使用。

set hive.exec.dynamic.partition=true;(可通过这个语句查看:set hive.exec.dynamic.partition;)
set hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=100000;(如果自动分区数大于这个参数,将会报错)
SET hive.exec.max.dynamic.partitions.pernode=100000;

insert into table student_ptn partition(city) select 6,"yangdong",29,"beijing";
insert into table student_ptn partition(city) select 2,"limei",22,"chongqing";
insert into table student_ptn partition(city) select 3,"wangxing",25,"beijing";
insert into table student_ptn partition(city) select 4,"chenming",22,"beijing";
insert into table student_ptn partition(city) select 5,"xiali",26,"chongqing";

create external table student_bck(id int comment "学生id",name string comment "学生姓名",age int comment "学生年龄")
comment "学生信息表"
clustered by(id) sorted by(id asc) into 2 buckets
row format delimited fields terminated by ","
location "/user/hive/student_bck";

insert into table student_bck
select * from student;

create table cdt(
id int,
name string,
work_location array,
piaofang map,
address struct)
row format delimited
fields terminated by "\t"
collection items terminated by ","
map keys terminated by ":"
lines terminated by "\n";

将json字符串加载到table json中
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
{"movie":"661","rate":"3","timeStamp":"978302109","uid":"1"}
{"movie":"914","rate":"3","timeStamp":"978301968","uid":"1"}
{"movie":"3408","rate":"4","timeStamp":"978300275","uid":"1"}
{"movie":"2355","rate":"5","timeStamp":"978824291","uid":"1"}
{"movie":"1197","rate":"3","timeStamp":"978302268","uid":"1"}
{"movie":"1287","rate":"5","timeStamp":"978302039","uid":"1"}
{"movie":"2804","rate":"5","timeStamp":"978300719","uid":"1"}
{"movie":"594","rate":"4","timeStamp":"978302268","uid":"1"}

CREATE TABLE json(
data string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://hmaster:9000/user/hive/warehouse/plusorg.db/json'
TBLPROPERTIES (
'transient_lastDdlTime'='1542008332')

load data local inpath "/root/json.txt" into table json;
get_json_object(data,'$.movie') 内置函数解析某列数据
select get_json_object(data,'$.movie') as movie from json;
json_tuple(jsonStr, k1, k2, ...) 内置函数解析json字符串类数据
参数为一组键k1,k2……和JSON字符串,返回值的元组。该方法比 get_json_object 高效,因为可以在一次调用中输入多个键

select
b.b_movie,
b.b_rate,
b.b_timeStamp,
b.b_uid
from json a
lateral view json_tuple(a.data,'movie','rate','timeStamp','uid') b as b_movie,b_rate,b_timeStamp,b_uid;

create table rate(movie int, rate int, unixtime int, userid int) row format delimited fields
terminated by '\t';

insert into table rate select
get_json_object(data,'$.movie') as moive,
get_json_object(data,'$.rate') as rate,
get_json_object(data,'$.timeStamp') as unixtime,
get_json_object(data,'$.uid') as userid
from json;

select from_unixtime(unixtime,'yyyy/MM/dd HH:mm:ss') from rate;

create table lastjsontable(movie int, rate int, utime date, userid int) row format delimited
fields terminated by '\t';
添加Python脚本,hive即可访问,路径为在当前Unix服务器存储绝对路径
add file /home/pythoncode/WeekdayMapper.py;

insert into table lastjsontable
select
transform(movie,rate,unixtime,userid) #输入值(基表)
using 'python WeekdayMapper.py' #使用脚本清洗
as(movie,rate,utime,userid) #输出值(子表)
from rate; #基表


分享标题:Hive创建表及插入数据demo
文章URL:http://kswsj.cn/article/ipespp.html

其他资讯