/*
--- Description ---
CSV text exported from Excel allows cells to contain commas and line breaks, so splitting
the CSV isn't as simple as splitting on comma and end-of-line. We need to temporarly
replace any LF (line feed) or comma characters that are in a cell so that we can correctly
split the CSV string.
--- Legend ---
CHAR(10) = LF (line feed/end of line)
CHAR(13) = CR (carraige return/end of line)
CHAR(128) = Represents line feed within quoted column
CHAR(129) = Respesents comma within quoted column
CHAR(130) = Represents quoted text within quoted column
--- Instructions ---
1. This assumes you will create this procedure exactly (so you can reuse it) and build a separate import procedure.
In your procedure, get the @csv text from the file (or whatever your source is). Ex:
DECLARE @csv NVARCHAR(MAX)
select @csv = <get your CSV string how ever you need to>
2. In your procedure, create your temporary table based on the CSV data (must match
what you expect to be returned) Ex:
CREATE TABLE #import(
...
)
3. In your procedure, call this procedure and insert the restults. Ex:
INSERT INTO #import EXEC sp_CSVtoTable @csv
4. Now you're ready to do whatever you need with the data (insert into permanent table, etc.)
*/
-- exec sp_00_CSVtoTable
CREATE PROCEDURE [dbo].[sp_00_CSVtoTable](
@csv NVARCHAR(MAX)
)
AS
BEGIN
SET NOCOUNT ON;
SET ANSI_WARNINGS OFF
/*************************************
Clean Text
**************************************/
SELECT @csv = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@csv
, CHAR(239), '') --Some funky character at the beginning of most CSV strings pulled from Excel
, CHAR(187), '')
, CHAR(191), '')
, CHAR(13), CHAR(10)) --Windows line ending is CR LF; remove CR and keep LF for consitency across OS
, CHAR(10) + CHAR(10), CHAR(10)) --Replace LF LF with LF
/*************************************
Replace LF and commas in-cell with
temporary characters
**************************************/
--Separate lines that are surrounded by quotes
DECLARE @csvReplaceTextInCell TABLE(
row INT IDENTITY(1,1)
, m_text NVARCHAR(MAX)
)
SELECT @csv = REPLACE(@csv, '""', CHAR(130)) --Double quotes represent an escaped quotation mark (a quotation mark used within a cell)
INSERT INTO @csvReplaceTextInCell SELECT value FROM STRING_SPLIT(@csv, '"') -- Each quoted column will be on a separate row. The rows preceding and following will be unquoted
UPDATE @csvReplaceTextInCell SET m_text = REPLACE(REPLACE(m_text
, CHAR(10), CHAR(128)) --Replace LF on quoted lines with temp character
, ',', CHAR(129)) --Replace commas on quoted lines with temp character
WHERE row % 2 = 0 --Every other row will be a quoted column (i.e. even rows will be quoted columns)
--Stuff and string_agg seem to be very unreliable. Re-concatenate string the old fashioned way
DECLARE @thisrow INT, @totalrows INT
SELECT @thisrow = 1, @totalrows = COUNT(*), @csv = ''
FROM @csvReplaceTextInCell
WHILE @thisrow <= @totalrows
BEGIN
SELECT @csv = @csv + m_text
FROM @csvReplaceTextInCell
WHERE row = @thisrow
SELECT @thisrow = @thisrow + 1
END
/*************************************
Split CSV string into rows
**************************************/
CREATE TABLE #csvRows (
attdata NVARCHAR(MAX)
)
INSERT INTO #csvRows SELECT value FROM STRING_SPLIT(@csv, CHAR(10))
DELETE FROM #csvRows WHERE attdata = '' OR attdata = ',' --remove blank row
ALTER TABLE #csvRows ADD row INT IDENTITY(1,1)
/*************************************
Create the table
**************************************/
CREATE TABLE #csvTable(
row INT
)
DECLARE @sql NVARCHAR(MAX)= 'ALTER TABLE #csvTable ADD '
SELECT @sql = @sql + 'c' + CONVERT(VARCHAR, ROW_NUMBER() OVER (ORDER BY value)) + ' NVARCHAR(MAX),'
FROM STRING_SPLIT((SELECT attdata FROM #csvRows WHERE row = 1), ',')
SELECT @sql = LEFT(@sql, LEN(@sql) - 1)
--PRINT @sql
EXEC (@sql)
ALTER TABLE #csvTable DROP COLUMN row
/*************************************
Split CSV string into columns for each row
**************************************/
SELECT @totalrows = COUNT(*), @thisrow = 1 FROM #csvRows
WHILE @thisrow <= @totalrows
BEGIN
SELECT @sql = 'INSERT INTO #csvTable SELECT '
SELECT @sql = @sql + ''''
+ REPLACE(REPLACE(REPLACE(REPLACE(value
, '''', '''''')
, CHAR(128), CHAR(10)) --Re-add the line break
, CHAR(129), ',') --Re-add commas
, CHAR(130), '"') --Re-add quotation mark
+ ''','
FROM STRING_SPLIT((SELECT attdata FROM #csvRows WHERE row = @thisrow), ',')
SELECT @sql = LEFT(@sql, LEN(@sql) - 1)
--PRINT @sql
BEGIN TRY
EXEC (@sql)
END TRY
BEGIN CATCH
PRINT 'Error on row ' + CONVERT(VARCHAR, @thisrow) + ': ' + ERROR_MESSAGE()
PRINT @SQL
SELECT * FROM #csvRows
RETURN
END CATCH
SET @thisrow = @thisrow + 1
END
/*************************************
Final output
**************************************/
SELECT * FROM #csvTable
DROP TABLE #csvTable
END