-- Copyright 2022 Colorado Mountain College
--
--Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), 
--to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
--and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
--
--The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
--
--THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
--FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
--WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--
-- Canvas Database Generation Script
--
-- The database generation script creates the Canvas Data database based on the downloaded JSON file. It is comprised of several steps.
-- 1.	Create the database on the server. This is a pretty basic setup. We set the recovery model to simple as we do not care about the transaction log and we set the isolation level 
--      to READ UNCOMMITTED as this is a read only database. It improves performance slightly as we don’t need to worry about lock overhead.
-- 2.	Create the JSON tables that will hold the parsed information from the JSON files. There are three tables.
--		a.	JSON_SCHEMA_VERSION: holds the JSON schema version. (Currently not used)
--		b.	JSON_TABLES: holds the table information
--		c.	JSON_COLUMNS: holds the column information
-- 3.	Import the JSON file,
--		a.	Extract the schema version
--		b.	Extract the table information and write to JSON_TABLES
--			i.	For each table
--				1.	Extract the column information and write to JSON_COLUMNS
-- 4.	Using the generated data, we can now create the Canvas Data warehouse
--		a.	Step though the JSON_TABLES data set and create the tables
--		b.	Step through the JSON_COLUMNS data set and create the columns
-- 5.	Create the primary keys on all columns where the name is ‘id’ 
--
-- Notes: The code uses some JSON wizadry stuff from SQL and thus may not run on older versions. The code was create on Microsoft SQL Server 2017 and should be comapatable with all newer versions.  
-- The database name is hard coded in the code. If you are not happy with the name, search for CanvasAnalytics2 and replace with with the name of your choosing. There is also a hard coded file path 
-- to the JSON file that needs to be changed. It's noted with "IMPORTANT".
--
-- As a nice side note. Since we extracted the table and column information into the JSON tables we also have a data dictionary. 

--=============================================================================
-- 1. Create the database on the database server
--=============================================================================



USE tempdb
IF EXISTS( SELECT NAME FROM sys.databases a WHERE a.name = 'CanvasAnalytics2')
BEGIN
    ALTER DATABASE CanvasAnalytics2 SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
    
	USE [tempdb];
	DROP DATABASE CanvasAnalytics2

END


IF NOT EXISTS( SELECT NAME FROM sys.databases a WHERE a.name = 'CanvasAnalytics2')
BEGIN
USE master
CREATE DATABASE CanvasAnalytics2
ALTER DATABASE CanvasAnalytics2 SET RECOVERY SIMPLE;
ALTER DATABASE CanvasAnalytics2 SET PAGE_VERIFY CHECKSUM;
END
GO

USE CanvasAnalytics2
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED
GO

--=============================================================================
-- 2. Create the schema tables for the schema version, tables and columns
-- This is where we load the information from the JSON file into which is then used to 
-- actually create all the tables
--=============================================================================


USE CanvasAnalytics2

IF EXISTS( SELECT name FROM sys.columns WHERE name = 'JSON_COLUMNS')
BEGIN
DROP TABLE [dbo].[JSON_COLUMNS]
END

IF EXISTS( SELECT name FROM sys.columns WHERE name = 'JSON_SCHEMA_VERSION')
BEGIN
DROP TABLE [dbo].[JSON_SCHEMA_VERSION]
END

IF EXISTS( SELECT name FROM sys.columns WHERE name = 'JSON_TABLES')
BEGIN
DROP TABLE [dbo].[JSON_TABLES]
END

CREATE TABLE JSON_SCHEMA_VERSION
(
	JSONSchemaVersion varchar(100)
);

CREATE TABLE JSON_TABLES
(
	TableName varchar(100),
	TableDesc varchar(4000),
	DWType varchar(100)
);

CREATE TABLE JSON_COLUMNS
(
	TableName varchar(100),
	ColumnName varchar(100),
	ColumnType varchar(50),
	ColumnLength varchar(50),
	ColumnDesc varchar(4000),
	FKName varchar(100),
	FKId varchar(100),
	FKRole varchar(100)
);
GO


--=============================================================================
-- 3. Parse the json schema and populate the JSON_TABLES table data
-- NOTE: The path needs to be change to where the JSON file is at. This is the 
-- JSON file that was downloaded as part of fetching the data from Canvas
--=============================================================================

TRUNCATE TABLE [dbo].[JSON_COLUMNS]
TRUNCATE TABLE [dbo].[JSON_TABLES]
TRUNCATE TABLE [dbo].[JSON_SCHEMA_VERSION]

--read in the json schema from file. 
--************************IMPORTANT: change the path to the correct location *****************************
DECLARE @JSON VARCHAR(MAX)
SELECT @JSON = BulkColumn
FROM OPENROWSET 
(BULK '\\cs-powerbi1\imports\CanvasData\CanvasData\schema.json' , SINGLE_CLOB) 
AS j


--debug: SELECT * FROM OPENJSON (@JSON,  '$.schema') 

DECLARE @schemaVersion varchar(100)
DECLARE @tables AS TABLE ([t_name] varchar(500))
DECLARE @tableSchemaObject varchar(1000)
DECLARE @tableName varchar(1000)
DECLARE @tableDesc varchar(4000)
DECLARE @dwType varchar(100)

DECLARE @columnSchemaObject varchar(1000)
DECLARE @columns AS TABLE (  columnTableName varchar(100),
							 columnName varchar(100),
							 columnType varchar(50),
							 columnLength varchar(50),
							 columnDesc varchar(4000),
							 fkName varchar(100),
							 fkId varchar(100),
							 fkRole varchar(100)
						   )

-- extract the schema version
IF(ISJSON(@JSON) = 1)
BEGIN
	SELECT 
	@schemaVersion = version
	FROM OPENJSON (@JSON,  '$') 
	WITH (version varchar(100) '$.version')
END


-- (currently not used)SELECT @schemaVersion INSERT INTO [dbo].[JSON_SCHEMA_VERSION] ([JSONSchemaVersion]) VALUES ( @schemaVersion)

-- extract the table information at the first level
IF(ISJSON(@JSON) = 1)
BEGIN
	INSERT INTO @tables
	SELECT 	[key] 
	FROM OPENJSON (@JSON,  '$.schema') 
END

-- Step through each table record and process the table information  
-- Write the table information to the JSON_TABLES table.
DECLARE tables_cursor CURSOR
	FOR SELECT [t_name] FROM @tables 
OPEN tables_cursor
FETCH NEXT FROM tables_cursor
INTO @tableName

WHILE @@FETCH_STATUS = 0
BEGIN

SET @tableSchemaObject = '$.schema.' + @tableName

SELECT 
	@tableName = tableName 
  , @tableDesc = description
  , @dwType = dw_type
FROM OPENJSON (@JSON,  @tableSchemaObject) 
WITH( dw_type varchar(50), tableName varchar(500),  description varchar(4000))

-- debug: SELECT @tableName, @tableDesc, @tableSchemaObject

INSERT INTO JSON_TABLES (TableName, TableDesc, DWType)
VALUES(@tableName, @tableDesc, @dwType)



--for the current table, parse the column information and write it to the JSON_COLUMNS table
SET @columnSchemaObject = @tableSchemaObject +'.columns'
-- debug: select @columnSchemaObject
-- debug: select @tableName

-- clear the var
delete from @columns

INSERT INTO @columns ( columnTableName, columnName, columnType, columnLength, columnDesc, fkName, fkId, fkRole)
SELECT 
@tableName,
name,
type,
length,
description,
fkName,
fkId,
fkRole
FROM OPENJSON (@JSON, @columnSchemaObject)
WITH (name varchar(100)  '$.name', 
	  type varchar(50) '$.type', 
	  length varchar(50) '$.length', 
	  description varchar(4000) '$.description',
	  fkName varchar(400) '$.dimension.name', 
	  fkId varchar(100) '$.dimension.id', 
	  fkRole varchar(100) '$.dimension.role'
	  )

-- debug: select * from @columns

INSERT INTO JSON_COLUMNS (TableName, ColumnName, ColumnType, ColumnLength, ColumnDesc, FKName, FKId, FKRole)
SELECT
columnTableName, columnName, columnType, columnLength, columnDesc, fkName, fkId, fkRole
FROM @columns

-- get the next table to process
FETCH NEXT FROM tables_cursor INTO @tableName

END

CLOSE tables_cursor
DEALLOCATE tables_cursor


--=============================================================================
-- We now have the necessary informaton to create the rest of the database from the JSON tables so,
-- Generate the tables and primary keys
--
-- Notes: 
-- 1. during the table generation we create a temporary column since we must have 
-- a column to create the table. We drop the column later so as to maintain the column
-- ordinal positions. They must match with what is within the JSON schema which is VERY important since the 
-- data load process relies on the column order.
-- 2. The JSON data types have to be translated into column data types that makes sense on the database
-- server. The translation occurs in the CASE statement. It's a best guess and seems to work for now.
-- It can always be changed. 
--=============================================================================

DECLARE @currentTableName varchar(100)
DECLARE @columnName varchar(1000)
DECLARE @columnType varchar(1000)
DECLARE @columnLength int
DECLARE @sqlTableName varchar(1000)
DECLARE @execCreateTableStatement nvarchar(max)
DECLARE @execCreateColumnStatement nvarchar(max)
DECLARE @dropColumnHolder nvarchar(1000)

-- create the tables
DECLARE tables_cursor CURSOR
	FOR SELECT TableName FROM JSON_TABLES
OPEN tables_cursor
FETCH NEXT FROM tables_cursor
INTO @currentTableName

	WHILE @@FETCH_STATUS = 0
	BEGIN

		SELECT @execCreateTableStatement = 'CREATE TABLE [CanvasAnalytics2].[dbo].' +  @currentTableName  + ' (columnHolder int)'
		-- debug: PRINT @execCreateTableStatement
		EXECUTE sp_executesql @execCreateTableStatement

		FETCH NEXT FROM tables_cursor
		INTO @currentTableName

	END

CLOSE tables_cursor
DEALLOCATE tables_cursor


-- create the columns
DECLARE columns_cursor CURSOR
	FOR SELECT TableName, ColumnName, ColumnType, ColumnLength FROM JSON_COLUMNS
OPEN columns_cursor
FETCH NEXT FROM columns_cursor
INTO @currentTableName, @columnName, @columnType, @columnLength
				
	WHILE @@FETCH_STATUS = 0
	BEGIN

		SELECT @execCreateColumnStatement =
			'ALTER TABLE ' + '[CanvasAnalytics2].[dbo].' +  @currentTableName + ' ADD [' + @columnName + '] '  + CASE 
																					WHEN @columnType = 'text' THEN 'varchar(max)'
																					WHEN @columnType = 'varchar' THEN @columnType + '(' + CAST(@columnLength AS VARCHAR(100) ) + ')'
																					WHEN @columnType = 'enum' THEN 'varchar(500)'
																					WHEN @columnType = 'boolean' THEN 'varchar(5)'
																					WHEN @columnType = 'double precision' THEN 'float'
																					WHEN @columnType = 'timestamp' THEN 'datetime2'
																					WHEN @columnType = 'integer' THEN 'int'
																					WHEN @columnType = 'int' THEN 'int'
																					WHEN @columnType = 'bigint' THEN 'bigint'
																					WHEN @columnType = 'date' THEN 'date'
																					WHEN @columnType = 'guid' THEN 'varchar(500)'
																					WHEN @columnType = 'datetime' THEN 'datetime2'
																					ELSE '***ERROR****************************************'
																			END
		-- debug: print @execCreateColumnStatement
		EXECUTE sp_executesql @execCreateColumnStatement
					
		FETCH NEXT FROM columns_cursor
		INTO @currentTableName, @columnName, @columnType, @columnLength
	END

CLOSE columns_cursor
DEALLOCATE columns_cursor

-- Drop the place holder columns. When we romove it all the other columns
-- move up the stack and alligns them with the JSON schema column order.
DECLARE tables_cursor CURSOR
	FOR SELECT TableName FROM JSON_TABLES
OPEN tables_cursor
FETCH NEXT FROM tables_cursor
INTO @currentTableName

	WHILE @@FETCH_STATUS = 0
	BEGIN

		SELECT @dropColumnHolder = 'ALTER TABLE [CanvasAnalytics2].[dbo].' +  @currentTableName  + ' DROP COLUMN columnHolder ' 
		PRINT @dropColumnHolder
		EXECUTE sp_executesql @dropColumnHolder

		FETCH NEXT FROM tables_cursor
		INTO @currentTableName

	END

CLOSE tables_cursor
DEALLOCATE tables_cursor



--=============================================================================
-- Create the primary keys.
-- The schema does not tell us which columns are the primary keys but,
-- the Canvas devs were really nice and named them all "id" so we just need to look
-- for column names that are exactly 'id'
--=============================================================================
DECLARE @pkTableName varchar(100)
DECLARE @pkColumnName varchar(100)
DECLARE @execCreatePKStatement nvarchar(max)
DECLARE @alterColumnStatement nvarchar(max)

DECLARE pk_cursor CURSOR
	FOR SELECT TableName, ColumnName FROM JSON_COLUMNS where ColumnName = 'id' --assumes that the surrogate key is always called 'id'
OPEN pk_cursor
FETCH NEXT FROM pk_cursor
INTO @pkTableName, @pkColumnName

	WHILE @@FETCH_STATUS = 0
	BEGIN

		SELECT @alterColumnStatement = 'ALTER TABLE [dbo].[' + @pkTableName + '] ALTER COLUMN [' + @pkColumnName + '] BIGINT NOT NULL'
		-- debug: PRINT @alterColumnStatement
		EXECUTE sp_executesql @alterColumnStatement

		SELECT @execCreatePKStatement = 'ALTER TABLE [dbo].[' + @pkTableName + '] ADD CONSTRAINT [PK_' + @pkTableName + '_' + @pkColumnName + '] PRIMARY KEY CLUSTERED ( ' + @pkColumnName + ' ASC )'
		-- debug: PRINT @execCreatePKStatement
		EXECUTE sp_executesql @execCreatePKStatement

		FETCH NEXT FROM pk_cursor
		INTO @pkTableName, @pkColumnName

	END

CLOSE pk_cursor
DEALLOCATE pk_cursor