此问题是此问题How to quickly export data from R to SQL Server的扩展。目前我使用的代码如下:
# DB Handle for config file #
dbhandle <- odbcDriverConnect()
# save the data in the table finally
sqlSave(dbhandle, bp, "FACT_OP", append=TRUE, rownames=FALSE, verbose = verbose, fast = TRUE)
# varTypes <- c(Date="datetime", QueryDate = "datetime")
# sqlSave(dbhandle, bp, "FACT_OP", rownames=FALSE,verbose = TRUE, fast = TRUE, varTypes=varTypes)
# DB handle close
odbcClose(dbhandle)
我也尝试过这种方法,它工作得很好,而且速度也很快。
toSQL = data.frame(...);
write.table(toSQL,"C:\\export\\filename.txt",quote=FALSE,sep=",",row.names=FALSE,col.names=FALSE,append=FALSE);
sqlQuery(channel,"BULK
INSERT Yada.dbo.yada
FROM '\\\\<server-that-SQL-server-can-see>\\export\\filename.txt'
WITH
(
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\\n'
)");
但我的问题是,我不能在事务之间保持我的数据静止(由于数据安全,将数据写入文件不是一个选项),所以我正在寻找解决方案,如果我可以直接从内存批量插入或缓存数据。谢谢你的帮助。
发布于 2018-02-08 05:39:08
问得好-在由于某种原因无法设置BULK INSERT
权限的情况下也很有用。
不久前,当我有足够的数据表明sqlSave
太慢时,我拼凑了这个可怜人的解决方案,但还不足以证明设置BULK INSERT
是合理的,所以它不需要向文件中写入任何数据。sqlSave
和参数化查询在插入数据方面速度如此之慢的主要原因是,每个行都是使用新的INSERT
语句插入的。在下面的示例中,让R手动编写INSERT
语句可以绕过这一点:
library(RODBC)
channel <- ...
dataTable <- ...relevant data...
numberOfThousands <- floor(nrow(dataTable)/1000)
extra <- nrow(dataTable)%%1000
thousandInsertQuery <- function(channel,dat,range){
sqlQuery(channel,paste0("INSERT INTO Database.dbo.Responses (IDNum,State,Answer)
VALUES "
,paste0(
sapply(range,function(k) {
paste0("(",dat$IDNum[k],",'",
dat$State[k],"','",
gsub("'","''",dat$Answer[k],fixed=TRUE),"')")
})
,collapse=",")))
}
if(numberOfThousands)
for(n in 1:numberOfThousands)
{
thousandInsertQuery(channel,(1000*(n-1)+1):(1000*n),dataTable)
}
if(extra)
thousandInsertQuery(channel,(1000*numberOfThousands+1):(1000*numberOfThousands+extra))
使用值写出的SQL的INSERT
语句一次最多只能接受1000行,因此此代码将其分解为块(比一次一行效率高得多)。
显然,必须对thousandInsertQuery
函数进行自定义,以处理数据框中的任何列-还要注意,字符/因子列周围有单引号,还有一个gsub
来处理字符列中可能存在的任何单引号。除此之外,没有针对SQL注入攻击的保护措施。
发布于 2017-06-14 09:43:21
使用DBI::dbWriteTable()
函数怎么样?下面的例子(我将我的R代码连接到MS SQL Express
的AWS RDS
实例):
library(DBI)
library(RJDBC)
library(tidyverse)
# Specify where you driver lives
drv <- JDBC(
"com.microsoft.sqlserver.jdbc.SQLServerDriver",
"c:/R/SQL/sqljdbc42.jar")
# Connect to AWS RDS instance
conn <- drv %>%
dbConnect(
host = "jdbc:sqlserver://xxx.ccgqenhjdi18.ap-southeast-2.rds.amazonaws.com",
user = "xxx",
password = "********",
port = 1433,
dbname= "qlik")
if(0) { # check what the conn object has access to
queryResults <- conn %>%
dbGetQuery("select * from information_schema.tables")
}
# Create test data
example_data <- data.frame(animal=c("dog", "cat", "sea cucumber", "sea urchin"),
feel=c("furry", "furry", "squishy", "spiny"),
weight=c(45, 8, 1.1, 0.8))
# Works in 20ms in my case
system.time(
conn %>% dbWriteTable(
"qlik.export.test",
example_data
)
)
# Let us see if we see the exported results
conn %>% dbGetQuery("select * FROM qlik.export.test")
# Let's clean the mess and force-close connection at the end of the process
conn %>% dbDisconnect()
对于少量传输的数据,它的工作速度相当快,如果您想要data.frame
-> SQL table
解决方案,它会显得相当优雅。
享受吧!
发布于 2018-10-13 21:48:47
在@jpd527解决方案的基础上构建,我发现它确实值得深入研究……
require(RODBC)
channel <- #connection parameters
dbPath <- # path to your table, database.table
data <- # the DF you have prepared for insertion, /!\ beware of column names and values types...
# Function to insert 1000 rows of data in one sqlQuery call, coming from
# any DF and into any database.table
insert1000Rows <- function(channel, dbPath, data, range){
# Defines columns names for the database.table
columns <- paste(names(data), collapse = ", ")
# Initialize a string which will incorporate all 1000 rows of values
values <- ""
# Not very elegant, but appropriately builds the values (a, b, c...), (d, e, f...) into a string
for (i in range) {
for (j in 1:ncol(data)) {
# First column
if (j == 1) {
if (i == min(range)) {
# First row, only "("
values <- paste0(values, "(")
} else {
# Next rows, ",("
values <- paste0(values, ",(")
}
}
# Value Handling
values <- paste0(
values
# Handling NA values you want to insert as NULL values
, ifelse(is.na(data[i, j])
, "null"
# Handling numeric values you want to insert as INT
, ifelse(is.numeric(data[i, j])
, data[i, J]
# Else handling as character to insert as VARCHAR
, paste0("'", data[i, j], "'")
)
)
)
# Separator for columns
if (j == ncol(data)) {
# Last column, close parenthesis
values <- paste0(values, ")")
} else {
# Other columns, add comma
values <- paste0(values, ",")
}
}
}
# Once the string is built, insert it into SQL Server
sqlQuery(channel,paste0("insert into ", dbPath, " (", columns, ") values ", values))
}
此insert1000Rows
函数在下一个函数sqlInsertAll
的循环中使用,您只需定义要将哪个DF插入到哪个database.table中即可。
# Main function which uses the insert1000rows function in a loop
sqlInsertAll <- function(channel, dbPath, data) {
numberOfThousands <- floor(nrow(data) / 1000)
extra <- nrow(data) %% 1000
if (numberOfThousands) {
for(n in 1:numberOfThousands) {
insert1000Rows(channel, dbPath, data, (1000 * (n - 1) + 1):(1000 * n))
print(paste0(n, "/", numberOfThousands))
}
}
if (extra) {
insert1000Rows(channel, dbPath, data, (1000 * numberOfThousands + 1):(1000 * numberOfThousands + extra))
}
}
这样,我可以在5分钟左右插入25万行数据,而使用RODBC包中的sqlSave
需要24小时以上的时间。
https://stackoverflow.com/questions/37688685
复制相似问题