-- Copyright 2022 Colorado Mountain College -- --Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), --to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, --and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -- --The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -- --THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, --FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, --WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -- -- Canvas Database Generation Script -- -- The database generation script creates the Canvas Data database based on the downloaded JSON file. It is comprised of several steps. -- 1. Create the database on the server. This is a pretty basic setup. We set the recovery model to simple as we do not care about the transaction log and we set the isolation level -- to READ UNCOMMITTED as this is a read only database. It improves performance slightly as we don’t need to worry about lock overhead. -- 2. Create the JSON tables that will hold the parsed information from the JSON files. There are three tables. -- a. JSON_SCHEMA_VERSION: holds the JSON schema version. (Currently not used) -- b. JSON_TABLES: holds the table information -- c. JSON_COLUMNS: holds the column information -- 3. Import the JSON file, -- a. Extract the schema version -- b. Extract the table information and write to JSON_TABLES -- i. For each table -- 1. Extract the column information and write to JSON_COLUMNS -- 4. Using the generated data, we can now create the Canvas Data warehouse -- a. Step though the JSON_TABLES data set and create the tables -- b. Step through the JSON_COLUMNS data set and create the columns -- 5. Create the primary keys on all columns where the name is ‘id’ -- -- Notes: The code uses some JSON wizadry stuff from SQL and thus may not run on older versions. The code was create on Microsoft SQL Server 2017 and should be comapatable with all newer versions. -- The database name is hard coded in the code. If you are not happy with the name, search for CanvasAnalytics2 and replace with with the name of your choosing. There is also a hard coded file path -- to the JSON file that needs to be changed. It's noted with "IMPORTANT". -- -- As a nice side note. Since we extracted the table and column information into the JSON tables we also have a data dictionary. --============================================================================= -- 1. Create the database on the database server --============================================================================= USE tempdb IF EXISTS( SELECT NAME FROM sys.databases a WHERE a.name = 'CanvasAnalytics2') BEGIN ALTER DATABASE CanvasAnalytics2 SET SINGLE_USER WITH ROLLBACK IMMEDIATE; USE [tempdb]; DROP DATABASE CanvasAnalytics2 END IF NOT EXISTS( SELECT NAME FROM sys.databases a WHERE a.name = 'CanvasAnalytics2') BEGIN USE master CREATE DATABASE CanvasAnalytics2 ALTER DATABASE CanvasAnalytics2 SET RECOVERY SIMPLE; ALTER DATABASE CanvasAnalytics2 SET PAGE_VERIFY CHECKSUM; END GO USE CanvasAnalytics2 SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED GO --============================================================================= -- 2. Create the schema tables for the schema version, tables and columns -- This is where we load the information from the JSON file into which is then used to -- actually create all the tables --============================================================================= USE CanvasAnalytics2 IF EXISTS( SELECT name FROM sys.columns WHERE name = 'JSON_COLUMNS') BEGIN DROP TABLE [dbo].[JSON_COLUMNS] END IF EXISTS( SELECT name FROM sys.columns WHERE name = 'JSON_SCHEMA_VERSION') BEGIN DROP TABLE [dbo].[JSON_SCHEMA_VERSION] END IF EXISTS( SELECT name FROM sys.columns WHERE name = 'JSON_TABLES') BEGIN DROP TABLE [dbo].[JSON_TABLES] END CREATE TABLE JSON_SCHEMA_VERSION ( JSONSchemaVersion varchar(100) ); CREATE TABLE JSON_TABLES ( TableName varchar(100), TableDesc varchar(4000), DWType varchar(100) ); CREATE TABLE JSON_COLUMNS ( TableName varchar(100), ColumnName varchar(100), ColumnType varchar(50), ColumnLength varchar(50), ColumnDesc varchar(4000), FKName varchar(100), FKId varchar(100), FKRole varchar(100) ); GO --============================================================================= -- 3. Parse the json schema and populate the JSON_TABLES table data -- NOTE: The path needs to be change to where the JSON file is at. This is the -- JSON file that was downloaded as part of fetching the data from Canvas --============================================================================= TRUNCATE TABLE [dbo].[JSON_COLUMNS] TRUNCATE TABLE [dbo].[JSON_TABLES] TRUNCATE TABLE [dbo].[JSON_SCHEMA_VERSION] --read in the json schema from file. --************************IMPORTANT: change the path to the correct location ***************************** DECLARE @JSON VARCHAR(MAX) SELECT @JSON = BulkColumn FROM OPENROWSET (BULK '\\cs-powerbi1\imports\CanvasData\CanvasData\schema.json' , SINGLE_CLOB) AS j --debug: SELECT * FROM OPENJSON (@JSON, '$.schema') DECLARE @schemaVersion varchar(100) DECLARE @tables AS TABLE ([t_name] varchar(500)) DECLARE @tableSchemaObject varchar(1000) DECLARE @tableName varchar(1000) DECLARE @tableDesc varchar(4000) DECLARE @dwType varchar(100) DECLARE @columnSchemaObject varchar(1000) DECLARE @columns AS TABLE ( columnTableName varchar(100), columnName varchar(100), columnType varchar(50), columnLength varchar(50), columnDesc varchar(4000), fkName varchar(100), fkId varchar(100), fkRole varchar(100) ) -- extract the schema version IF(ISJSON(@JSON) = 1) BEGIN SELECT @schemaVersion = version FROM OPENJSON (@JSON, '$') WITH (version varchar(100) '$.version') END -- (currently not used)SELECT @schemaVersion INSERT INTO [dbo].[JSON_SCHEMA_VERSION] ([JSONSchemaVersion]) VALUES ( @schemaVersion) -- extract the table information at the first level IF(ISJSON(@JSON) = 1) BEGIN INSERT INTO @tables SELECT [key] FROM OPENJSON (@JSON, '$.schema') END -- Step through each table record and process the table information -- Write the table information to the JSON_TABLES table. DECLARE tables_cursor CURSOR FOR SELECT [t_name] FROM @tables OPEN tables_cursor FETCH NEXT FROM tables_cursor INTO @tableName WHILE @@FETCH_STATUS = 0 BEGIN SET @tableSchemaObject = '$.schema.' + @tableName SELECT @tableName = tableName , @tableDesc = description , @dwType = dw_type FROM OPENJSON (@JSON, @tableSchemaObject) WITH( dw_type varchar(50), tableName varchar(500), description varchar(4000)) -- debug: SELECT @tableName, @tableDesc, @tableSchemaObject INSERT INTO JSON_TABLES (TableName, TableDesc, DWType) VALUES(@tableName, @tableDesc, @dwType) --for the current table, parse the column information and write it to the JSON_COLUMNS table SET @columnSchemaObject = @tableSchemaObject +'.columns' -- debug: select @columnSchemaObject -- debug: select @tableName -- clear the var delete from @columns INSERT INTO @columns ( columnTableName, columnName, columnType, columnLength, columnDesc, fkName, fkId, fkRole) SELECT @tableName, name, type, length, description, fkName, fkId, fkRole FROM OPENJSON (@JSON, @columnSchemaObject) WITH (name varchar(100) '$.name', type varchar(50) '$.type', length varchar(50) '$.length', description varchar(4000) '$.description', fkName varchar(400) '$.dimension.name', fkId varchar(100) '$.dimension.id', fkRole varchar(100) '$.dimension.role' ) -- debug: select * from @columns INSERT INTO JSON_COLUMNS (TableName, ColumnName, ColumnType, ColumnLength, ColumnDesc, FKName, FKId, FKRole) SELECT columnTableName, columnName, columnType, columnLength, columnDesc, fkName, fkId, fkRole FROM @columns -- get the next table to process FETCH NEXT FROM tables_cursor INTO @tableName END CLOSE tables_cursor DEALLOCATE tables_cursor --============================================================================= -- We now have the necessary informaton to create the rest of the database from the JSON tables so, -- Generate the tables and primary keys -- -- Notes: -- 1. during the table generation we create a temporary column since we must have -- a column to create the table. We drop the column later so as to maintain the column -- ordinal positions. They must match with what is within the JSON schema which is VERY important since the -- data load process relies on the column order. -- 2. The JSON data types have to be translated into column data types that makes sense on the database -- server. The translation occurs in the CASE statement. It's a best guess and seems to work for now. -- It can always be changed. --============================================================================= DECLARE @currentTableName varchar(100) DECLARE @columnName varchar(1000) DECLARE @columnType varchar(1000) DECLARE @columnLength int DECLARE @sqlTableName varchar(1000) DECLARE @execCreateTableStatement nvarchar(max) DECLARE @execCreateColumnStatement nvarchar(max) DECLARE @dropColumnHolder nvarchar(1000) -- create the tables DECLARE tables_cursor CURSOR FOR SELECT TableName FROM JSON_TABLES OPEN tables_cursor FETCH NEXT FROM tables_cursor INTO @currentTableName WHILE @@FETCH_STATUS = 0 BEGIN SELECT @execCreateTableStatement = 'CREATE TABLE [CanvasAnalytics2].[dbo].' + @currentTableName + ' (columnHolder int)' -- debug: PRINT @execCreateTableStatement EXECUTE sp_executesql @execCreateTableStatement FETCH NEXT FROM tables_cursor INTO @currentTableName END CLOSE tables_cursor DEALLOCATE tables_cursor -- create the columns DECLARE columns_cursor CURSOR FOR SELECT TableName, ColumnName, ColumnType, ColumnLength FROM JSON_COLUMNS OPEN columns_cursor FETCH NEXT FROM columns_cursor INTO @currentTableName, @columnName, @columnType, @columnLength WHILE @@FETCH_STATUS = 0 BEGIN SELECT @execCreateColumnStatement = 'ALTER TABLE ' + '[CanvasAnalytics2].[dbo].' + @currentTableName + ' ADD [' + @columnName + '] ' + CASE WHEN @columnType = 'text' THEN 'varchar(max)' WHEN @columnType = 'varchar' THEN @columnType + '(' + CAST(@columnLength AS VARCHAR(100) ) + ')' WHEN @columnType = 'enum' THEN 'varchar(500)' WHEN @columnType = 'boolean' THEN 'varchar(5)' WHEN @columnType = 'double precision' THEN 'float' WHEN @columnType = 'timestamp' THEN 'datetime2' WHEN @columnType = 'integer' THEN 'int' WHEN @columnType = 'int' THEN 'int' WHEN @columnType = 'bigint' THEN 'bigint' WHEN @columnType = 'date' THEN 'date' WHEN @columnType = 'guid' THEN 'varchar(500)' WHEN @columnType = 'datetime' THEN 'datetime2' ELSE '***ERROR****************************************' END -- debug: print @execCreateColumnStatement EXECUTE sp_executesql @execCreateColumnStatement FETCH NEXT FROM columns_cursor INTO @currentTableName, @columnName, @columnType, @columnLength END CLOSE columns_cursor DEALLOCATE columns_cursor -- Drop the place holder columns. When we romove it all the other columns -- move up the stack and alligns them with the JSON schema column order. DECLARE tables_cursor CURSOR FOR SELECT TableName FROM JSON_TABLES OPEN tables_cursor FETCH NEXT FROM tables_cursor INTO @currentTableName WHILE @@FETCH_STATUS = 0 BEGIN SELECT @dropColumnHolder = 'ALTER TABLE [CanvasAnalytics2].[dbo].' + @currentTableName + ' DROP COLUMN columnHolder ' PRINT @dropColumnHolder EXECUTE sp_executesql @dropColumnHolder FETCH NEXT FROM tables_cursor INTO @currentTableName END CLOSE tables_cursor DEALLOCATE tables_cursor --============================================================================= -- Create the primary keys. -- The schema does not tell us which columns are the primary keys but, -- the Canvas devs were really nice and named them all "id" so we just need to look -- for column names that are exactly 'id' --============================================================================= DECLARE @pkTableName varchar(100) DECLARE @pkColumnName varchar(100) DECLARE @execCreatePKStatement nvarchar(max) DECLARE @alterColumnStatement nvarchar(max) DECLARE pk_cursor CURSOR FOR SELECT TableName, ColumnName FROM JSON_COLUMNS where ColumnName = 'id' --assumes that the surrogate key is always called 'id' OPEN pk_cursor FETCH NEXT FROM pk_cursor INTO @pkTableName, @pkColumnName WHILE @@FETCH_STATUS = 0 BEGIN SELECT @alterColumnStatement = 'ALTER TABLE [dbo].[' + @pkTableName + '] ALTER COLUMN [' + @pkColumnName + '] BIGINT NOT NULL' -- debug: PRINT @alterColumnStatement EXECUTE sp_executesql @alterColumnStatement SELECT @execCreatePKStatement = 'ALTER TABLE [dbo].[' + @pkTableName + '] ADD CONSTRAINT [PK_' + @pkTableName + '_' + @pkColumnName + '] PRIMARY KEY CLUSTERED ( ' + @pkColumnName + ' ASC )' -- debug: PRINT @execCreatePKStatement EXECUTE sp_executesql @execCreatePKStatement FETCH NEXT FROM pk_cursor INTO @pkTableName, @pkColumnName END CLOSE pk_cursor DEALLOCATE pk_cursor