Hi @dtod, we had a similar problem with our submissions table. We used the relevant CREATE TABLE statements that you can find here and then the shell script below, which works on the files downloaded by dap snapshot. Hope that helps!
#list gz files in job subdirectory - there should be just one - to make array
gzips=($(ls job*/*.gz))
#loop through files in array
for archive in ${gzips[@]}
do
echo "$(date) starting on $archive"
#field 47 is meta.ts, which isn't stored in the table and causes errors
gzip -dc $archive |cut -f1-46 >> $archive.txt
#splitting archives into 500K line parts
split -d -l 500000 $archive.txt subm_
#ls parts by the filename stem to make array
parts=($(ls subm_*))
#loop through files in array
for part in ${parts[@]}
do
#test filename to see if it's first part, which contains header row
if [ $(echo $part | cut -f2 -d_) == "00" ];
then psqlcommand="\copy canvas.submissions FROM $part WITH (HEADER) ;"
else psqlcommand="\copy canvas.submissions FROM $part ;"
fi
psql -c "$psqlcommand"
echo "$(date) $part done"
rm $part
done
rm $archive.txt
done