In order to connect R with Hive you have to work with the RJDBC
and rJava
libraries. Below, we provide a code snippet where we get data from mytbl
table which is under the mydb
database.
library(RJDBC) library(rJava) ## check library paths, amd64/server (jvm) should be first .libPaths() ## add memory to the VM and options options(java.parameters = "-Xmx8000m") #start VM .jinit() # add classpath for(l in list.files('/opt/hivejdbc/')){ .jaddClassPath(paste("/opt/hivejdbc/",l,sep=""))} # check classpath .jclassPath() #load driver drv <- JDBC("com.cloudera.hive.jdbc4.HS2Driver","/opt/hivejdbc/HiveJDBC4.jar", identifier.quote="`") conn <- dbConnect(drv, "jdbc:hive2://URL", "username", "password") # if you want to get the list of databases show_databases <- dbGetQuery(conn, "show databases") # get data from mytbl which is under mydb database my_table <- dbGetQuery(conn, "select * from mydb.mytbl")