sprint-econtai/create_onet_database.sh
2025-04-26 23:38:19 +02:00

85 lines
2.4 KiB
Bash
Executable file

#!/usr/bin/env bash
# Set database name and directories
ONET_DB_NAME="onet.database"
ONET_ZIP_URL="https://www.onetcenter.org/dl_files/database/db_29_1_mysql.zip"
ONET_ZIP_FILE="db_29_1_mysql.zip"
ONET_EXTRACT_DIR="db_29_1_mysql"
# Download O*NET database only if not already downloaded
if [ ! -f "$ONET_ZIP_FILE" ]; then
echo "Downloading O*NET database from $ONET_ZIP_URL"
curl -L -o "$ONET_ZIP_FILE" "$ONET_ZIP_URL" || wget -O "$ONET_ZIP_FILE" "$ONET_ZIP_URL"
if [ $? -ne 0 ]; then
echo "Failed to download O*NET database"
exit 1
fi
else
echo "Using existing O*NET database zip file"
fi
# Extract downloaded zip file only if extraction directory doesn't exist
if [ ! -d "$ONET_EXTRACT_DIR" ]; then
echo "Extracting O*NET database files"
unzip -o "$ONET_ZIP_FILE"
if [ $? -ne 0 ]; then
echo "Failed to extract O*NET database files"
exit 1
fi
else
echo "Using existing extracted O*NET database files"
fi
# Remove existing database if it exists
if [ -f "$ONET_DB_NAME" ]; then
echo "Removing existing database"
rm "$ONET_DB_NAME"
fi
# Create a new SQLite database with optimized settings for fast import
echo "Creating new SQLite database: $ONET_DB_NAME with performance settings"
sqlite3 "$ONET_DB_NAME" << EOF
PRAGMA journal_mode = OFF;
PRAGMA synchronous = 0;
PRAGMA cache_size = 1000000;
PRAGMA locking_mode = EXCLUSIVE;
PRAGMA temp_store = MEMORY;
PRAGMA foreign_keys = ON;
EOF
# Combine and execute all SQL files in one transaction
echo "Executing SQL files in alphabetical order (single transaction mode)"
sqlite3 "$ONET_DB_NAME" << EOF
BEGIN TRANSACTION;
$(find "$ONET_EXTRACT_DIR" -name "*.sql" | sort | xargs cat)
COMMIT;
EOF
# Check if the execution was successful
if [ $? -ne 0 ]; then
echo "Error executing SQL files in batch transaction"
exit 1
else
echo "Database populated successfully. Restoring reliability settings..."
# Restore reliability-focused settings after import
sqlite3 "$ONET_DB_NAME" << EOF
PRAGMA journal_mode = WAL;
PRAGMA synchronous = NORMAL;
PRAGMA locking_mode = NORMAL;
PRAGMA temp_store = DEFAULT;
PRAGMA foreign_keys = ON;
PRAGMA optimize;
VACUUM;
EOF
if [ $? -ne 0 ]; then
echo "Warning: Failed to restore reliability settings, but database is populated"
else
echo "Reliability settings restored successfully"
fi
echo "O*NET database created and optimized successfully!"
fi