##### # # Name: Canvas.API.SelectedTableDownload # Author: Joseph Gullo, California Northstate University # # Description: This script selectively downloads tables from Canvas using # the canvas data cli then prepares those downloads for import into # Microsoft SQL Server # # Usage: # This script, when run as a scheduled task, will create an access token # using the Canvas Data v2 API, and use that to pull a list of selected # tables into tab separated value (tsv) files which will later be # imported by a SQL server agent job. This script takes a list of # tables, downloads the latest schema files for the tables and # stashes them for reference, then creates a job to request the # download of the indicated table. The script waits for the job to # complete, then queries the result status to get the URL of the file # to download, then downloads it. It also uses 7-zip to extract the # compressed tsv file, then uses some find/replace statements to clean # up some special characters for import. # ##### # Paths for later file downloading $DownloadPath = 'F:\Canvas\dataFiles\' $UnpackedPath = 'F:\Canvas\unpackedFiles\' $SchemaPath = 'F:\Canvas\schemaFiles\' # Pull Canvas Data v2 API credentials from system environment variables $CLIENT_ID=$Env:CanvasV2ClientID $SECRET=$Env:CanvasV2Secret # Get a datestamp in a variable for later use $datestamp = get-date -format "yyyy.MM.dd" # Create a request for an access token for this data API session $Header = @{"Authorization" = "Basic "+[System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes("${CLIENT_ID}:${SECRET}"))} $body = @{ "grant_type" = "client_credentials" } $ACCESS_TOKEN = (Invoke-RestMethod -Method POST -Header $Header -ContentType "application/x-www-form-urlencoded" -Body $body -uri "https://api-gateway.instructure.com/ids/auth/login").access_token # Select the list of tables to process and download $TableList = @( #, "access_tokens" "account_users" , "accounts" #, "assessment_question_banks" #, "assessment_questions" #, "assignment_groups" #, "assignment_override_students" #, "assignment_overrides" #, "assignments" #, "attachment_associations" #, "attachments" #, "calendar_events" #, "canvadocs_annotation_contexts" #, "comment_bank_items" #, "communication_channels" #, "content_migrations" #, "content_participation_counts" #, "content_participations" #, "content_shares" #, "content_tags" #, "context_external_tools" #, "context_module_progressions" #, "context_modules" #, "conversation_message_participants" #, "conversation_messages" #, "conversation_participants" #, "conversations" #, "course_account_associations" #, "course_sections" , "courses" #, "custom_gradebook_column_data" #, "custom_gradebook_columns" #, "developer_key_account_bindings" #, "developer_keys" #, "discussion_entries" #, "discussion_entry_participants" #, "discussion_topic_participants" #, "discussion_topics" #, "enrollment_dates_overrides" , "enrollment_states" , "enrollment_terms" , "enrollments" #, "favorites" #, "folders" #, "grading_period_groups" #, "grading_periods" #, "grading_standards" #, "group_categories" #, "group_memberships" #, "groups" #, "late_policies" #, "learning_outcome_groups" #, "learning_outcome_question_results" #, "learning_outcome_results" #, "learning_outcomes" #, "lti_line_items" #, "lti_resource_links" #, "lti_results" #, "master_courses_child_content_tags" #, "master_courses_child_subscriptions" #, "master_courses_master_content_tags" #, "master_courses_master_migrations" #, "master_courses_master_templates" #, "master_courses_migration_results" #, "originality_reports" #, "outcome_proficiencies" #, "outcome_proficiency_ratings" #, "post_policies" , "pseudonyms" #, "quiz_groups" #, "quiz_questions" #, "quiz_submissions" #, "quizzes" #, "role_overrides" , "roles" #, "rubric_assessments" #, "rubric_associations" #, "rubrics" #, "score_statistics" , "scores" #, "submission_comments" #, "submission_versions" #, "submissions" #, "user_account_associations" #, "user_notes" , "users" #, "web_conference_participants" #, "web_conferences" #, "wiki_pages" #, "wikis" ) #$TableList = ( "scores" ) # Iterate through the list of tables we want to download forEach ($Table in $TableList) { # Download the schema definition for the current table $SchemaFile = $SchemaPath + "$Table.schema.json" $ReqHeader = @{"x-instauth" = "${ACCESS_TOKEN}"} $ReqContentType = "application/json" $SchemaReqURL = "https://api-gateway.instructure.com/dap/query/canvas/table/" + $Table + "/schema" try { Invoke-WebRequest -Header $ReqHeader -Method GET -ContentType "application/json" $SchemaReqURL -OutFile $SchemaFile } catch { $_.Exception.Message #exit } # Initiate a job to start downloading a full dump of the selected table $ReqHeader = @{"x-instauth" = "${ACCESS_TOKEN}"} $ReqContentType = "application/json" $ReqBody = @{ "format" = "tsv" "mode" = "expanded" } $ReqBodyJson = $ReqBody | convertto-json $RequestURL = "https://api-gateway.instructure.com/dap/query/canvas/table/" + $Table + "/data" try { $RequestID = (Invoke-RestMethod -Header $ReqHeader -Method POST -ContentType "application/json" -Body $ReqBodyJson $RequestURL) } catch { $_.Exception.Message #exit } # Begin tracking the status of the job requesting the selected table download $JobStatusURL = "https://api-gateway.instructure.com/dap/job/" + $RequestID.id try { $RequestStatus = (Invoke-RestMethod -Header $ReqHeader -Method GET -ContentType "application/json" $JobStatusURL) } catch { $_.Exception.Message #exit } $RequestStatus # Debug Output # Every 20 seconds, check that the job is complete. If it's complete, move forward while ( $RequestStatus.status -ne 'complete' ) { try { $RequestStatus = (Invoke-RestMethod -Header $ReqHeader -Method GET -ContentType "application/json" $JobStatusURL) } catch { $_.Exception.Message #exit } $RequestStatus.status # Debug Output sleep 20 } # Once the job is complete, initiate a process to collect the URL of the object, download it to a file, # uncompress the file, then clean it up for import $ObjReqBody = convertto-json $RequestStatus.objects $ObjReqBody # Debug Output $ObjUrlID = ($RequestStatus.objects).id try { $URLs = Invoke-RestMethod -Header $ReqHeader -Method POST -ContentType "application/json" -Body $ObjReqBody https://api-gateway.instructure.com/dap/object/url } catch { $_.Exception.Message #exit } $URLs # Debug Output foreach( $url in $URLs.urls ) { $urlCounter = 0 foreach ( $ObjUrl in $ObjUrlID ) { $ObjUrl # Debug Output $url.$ObjUrl # Debug Output $url.$ObjUrl.url # Debug Output $DownloadFilePath = $DownloadPath + $Table + "." + $urlCounter + ".tsv.gz" try { Invoke-WebRequest -Header $ReqHeader -Method GET $url.$ObjUrl.url -OutFile $DownloadFilePath } catch { $_.Exception.Message #exit } & "C:\Program Files\7-Zip\7z.exe" x $DownloadFilePath -aoa -o"$UnpackedPath" #rm $DownloadFilePath $UnpackedFilePath = $UnpackedPath + $Table + "." + $urlCounter + ".tsv" # Process the uncompressed files, converting "\N" to blank representing a NULL for import into Microsoft SQL ((Get-Content -path $UnpackedFilePath -Raw) -replace '\\N','') | Set-Content -Path $UnpackedFilePath # Convert line endings and unneeded blank lines for import into Microsoft SQL (Get-Content -path $UnpackedFilePath) | ? {$_.trim() -ne "" } | Set-Content -Path $UnpackedFilePath # Add a tab to the end of each line for import into Microsoft SQL #(Get-Content -path $UnpackedFilePath) | foreach {$_ + "`t" } | Set-Content -Path $UnpackedFilePath $urlCounter =+ 1 } } }