old
This commit is contained in:
parent
720f21a85b
commit
43076bcbb1
42 changed files with 237415 additions and 7831 deletions
22
archive/README.md
Normal file
22
archive/README.md
Normal file
|
@ -0,0 +1,22 @@
|
|||
# Presentation
|
||||
|
||||
## Notebooks
|
||||
|
||||
- [data enrichment](data_enrichment.ipynb) - contains the code to gather things from the O\*NET data, BLS's OEWS database (unused for now), Barnett's data...
|
||||
- [prompt evaluation](evaluate_llm_time_estimations.ipynb) - the playground used to evaluate change in hyperparameters (system prompt, user prompt, schema, model...)
|
||||
- [analysis](analysis.ipynb) - code to generate the graphs in the paper
|
||||
- [legacy](legacy.ipynb) - if there are some missing pieces, it's worth looking in there.
|
||||
|
||||
## Running the non-notebook code
|
||||
|
||||
To re-run everything, you need python and uv up and running, if you use have nix installed, run
|
||||
|
||||
```bash
|
||||
nix develop .#impure
|
||||
```
|
||||
|
||||
and then `uv run ...` as requested in the notebooks.
|
||||
|
||||
If some things are missing, email <dorn@xfe.li>, I'm usually reactive.
|
||||
|
||||
Copy `.env.example` to `.env` and fill in OPENAI_API_KEY. The total run and experiments cost less than <10$.
|
2429
archive/analysis.ipynb
Normal file
2429
archive/analysis.ipynb
Normal file
File diff suppressed because one or more lines are too long
21699
archive/bck_estimates.csv
Normal file
21699
archive/bck_estimates.csv
Normal file
File diff suppressed because it is too large
Load diff
4935
archive/data_enrichment.ipynb
Normal file
4935
archive/data_enrichment.ipynb
Normal file
File diff suppressed because one or more lines are too long
2441
archive/evaluate_llm_time_estimations.ipynb
Normal file
2441
archive/evaluate_llm_time_estimations.ipynb
Normal file
File diff suppressed because one or more lines are too long
628
archive/legacy.ipynb
Normal file
628
archive/legacy.ipynb
Normal file
|
@ -0,0 +1,628 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"id": "beace815-b5ae-44a4-a81c-a7f82cb66296",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2K\u001b[2mResolved \u001b[1m118 packages\u001b[0m \u001b[2min 386ms\u001b[0m\u001b[0m \u001b[0m\n",
|
||||
"\u001b[2K\u001b[2mPrepared \u001b[1m2 packages\u001b[0m \u001b[2min 124ms\u001b[0m\u001b[0m \n",
|
||||
"\u001b[2K\u001b[2mInstalled \u001b[1m2 packages\u001b[0m \u001b[2min 5ms\u001b[0m\u001b[0m \u001b[0m\n",
|
||||
" \u001b[32m+\u001b[39m \u001b[1met-xmlfile\u001b[0m\u001b[2m==2.0.0\u001b[0m\n",
|
||||
" \u001b[32m+\u001b[39m \u001b[1mopenpyxl\u001b[0m\u001b[2m==3.1.5\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!uv add pandas requests openai dotenv openpyxl"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "941d511f-ad72-4306-bbab-52127583e513",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import dotenv\n",
|
||||
"import openai\n",
|
||||
"import sqlite3\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"dotenv.load_dotenv() # Copy .env.example to .env and fill in the blanks\n",
|
||||
"\n",
|
||||
"oai_token = os.getenv(\"OPENAI_API_KEY\")\n",
|
||||
"\n",
|
||||
"oai = openai.OpenAI(api_key=oai_token)\n",
|
||||
"onet = sqlite3.connect(\"onet.database\") # Run ./create_onet_database.sh to create it\n",
|
||||
"# This dataset comes from https://epoch.ai/gradient-updates/consequences-of-automating-remote-work\n",
|
||||
"# It contains labels for whethere a O*NET task can be done remotely or not (labeled by GPT-4o)\n",
|
||||
"# You can download it here: https://drive.google.com/file/d/1GrHhuYIgaCCgo99dZ_40BWraz-fzo76r/view?usp=sharing\n",
|
||||
"df_remote_status = pd.read_csv(\"epoch_task_data.csv\")\n",
|
||||
"\n",
|
||||
"# BLS OEWS: https://www.bls.gov/oes/special-requests/oesm23nat.zip\n",
|
||||
"df_oesm = pd.read_excel(\"oesm23national.xlsx\")\n",
|
||||
"\n",
|
||||
"# Run uv run enrich_task_ratings.py to get this file (trs = Task RatingS)\n",
|
||||
"df_enriched_trs = pd.read_json(\"task_ratings_enriched.json\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a5351f8b-c2ad-4d3e-af4a-992f539a6064",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FREQUENCY_MAP = {\n",
|
||||
" 'frequency_category_1': \"Yearly or less\",\n",
|
||||
" 'frequency_category_2': \"More than yearly\",\n",
|
||||
" 'frequency_category_3': \"More than monthly\",\n",
|
||||
" 'frequency_category_4': \"More than weekly\",\n",
|
||||
" 'frequency_category_5': \"Daily\",\n",
|
||||
" 'frequency_category_6': \"Several times daily\",\n",
|
||||
" 'frequency_category_7': \"Hourly or more\"\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "8b2ab22a-afab-41f9-81a3-48eab261b568",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"background_prompt = '''\n",
|
||||
"Estimate the typical duration to complete *one instance* of the following job task from the moment a person starts to work on it to the last moment the person will need to keep it in mind\n",
|
||||
"\n",
|
||||
"Take into account that there might be delays between the steps to complete the task, which would lengthen the estimate.\n",
|
||||
"\n",
|
||||
"Output a range with the format [duration A] - [duration B] where [duration A] and [duration B] correspond to one of the durations below:\n",
|
||||
"- less than 30 minutes\n",
|
||||
"- 30 minutes\n",
|
||||
"- 1 hour\n",
|
||||
"- 4 hours\n",
|
||||
"- 8 hours\n",
|
||||
"- 16 hours\n",
|
||||
"- 3 days\n",
|
||||
"- 1 week\n",
|
||||
"- 3 weeks\n",
|
||||
"- 6 weeks\n",
|
||||
"- 3 months\n",
|
||||
"- 6 months\n",
|
||||
"- 1 year\n",
|
||||
"- 3 years\n",
|
||||
"- more than 3 year\n",
|
||||
"\n",
|
||||
"**Do not output anything besides the range**\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "d2e4a855-f327-4b3d-ad0b-ed997e720639",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>onetsoc_code</th>\n",
|
||||
" <th>task_id</th>\n",
|
||||
" <th>task</th>\n",
|
||||
" <th>occupation_title</th>\n",
|
||||
" <th>occupation_description</th>\n",
|
||||
" <th>frequency_category_1</th>\n",
|
||||
" <th>frequency_category_2</th>\n",
|
||||
" <th>frequency_category_3</th>\n",
|
||||
" <th>frequency_category_4</th>\n",
|
||||
" <th>frequency_category_5</th>\n",
|
||||
" <th>frequency_category_6</th>\n",
|
||||
" <th>frequency_category_7</th>\n",
|
||||
" <th>importance_average</th>\n",
|
||||
" <th>relevance_average</th>\n",
|
||||
" <th>OCC_CODE</th>\n",
|
||||
" <th>TOT_EMP</th>\n",
|
||||
" <th>H_MEAN</th>\n",
|
||||
" <th>A_MEAN</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>11-1011.00</td>\n",
|
||||
" <td>8823</td>\n",
|
||||
" <td>Direct or coordinate an organization's financi...</td>\n",
|
||||
" <td>Chief Executives</td>\n",
|
||||
" <td>Determine and formulate policies and provide o...</td>\n",
|
||||
" <td>5.92</td>\n",
|
||||
" <td>15.98</td>\n",
|
||||
" <td>29.68</td>\n",
|
||||
" <td>21.18</td>\n",
|
||||
" <td>19.71</td>\n",
|
||||
" <td>4.91</td>\n",
|
||||
" <td>2.63</td>\n",
|
||||
" <td>4.52</td>\n",
|
||||
" <td>74.44</td>\n",
|
||||
" <td>11-1011</td>\n",
|
||||
" <td>211230.0</td>\n",
|
||||
" <td>124.47</td>\n",
|
||||
" <td>258900</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>11-1011.00</td>\n",
|
||||
" <td>8824</td>\n",
|
||||
" <td>Confer with board members, organization offici...</td>\n",
|
||||
" <td>Chief Executives</td>\n",
|
||||
" <td>Determine and formulate policies and provide o...</td>\n",
|
||||
" <td>1.42</td>\n",
|
||||
" <td>14.44</td>\n",
|
||||
" <td>27.31</td>\n",
|
||||
" <td>25.52</td>\n",
|
||||
" <td>26.88</td>\n",
|
||||
" <td>2.52</td>\n",
|
||||
" <td>1.90</td>\n",
|
||||
" <td>4.32</td>\n",
|
||||
" <td>81.71</td>\n",
|
||||
" <td>11-1011</td>\n",
|
||||
" <td>211230.0</td>\n",
|
||||
" <td>124.47</td>\n",
|
||||
" <td>258900</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>11-1011.00</td>\n",
|
||||
" <td>8827</td>\n",
|
||||
" <td>Prepare budgets for approval, including those ...</td>\n",
|
||||
" <td>Chief Executives</td>\n",
|
||||
" <td>Determine and formulate policies and provide o...</td>\n",
|
||||
" <td>15.50</td>\n",
|
||||
" <td>38.21</td>\n",
|
||||
" <td>32.73</td>\n",
|
||||
" <td>5.15</td>\n",
|
||||
" <td>5.25</td>\n",
|
||||
" <td>0.19</td>\n",
|
||||
" <td>2.98</td>\n",
|
||||
" <td>4.30</td>\n",
|
||||
" <td>93.41</td>\n",
|
||||
" <td>11-1011</td>\n",
|
||||
" <td>211230.0</td>\n",
|
||||
" <td>124.47</td>\n",
|
||||
" <td>258900</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>11-1011.00</td>\n",
|
||||
" <td>8826</td>\n",
|
||||
" <td>Direct, plan, or implement policies, objective...</td>\n",
|
||||
" <td>Chief Executives</td>\n",
|
||||
" <td>Determine and formulate policies and provide o...</td>\n",
|
||||
" <td>3.03</td>\n",
|
||||
" <td>17.33</td>\n",
|
||||
" <td>20.30</td>\n",
|
||||
" <td>18.10</td>\n",
|
||||
" <td>33.16</td>\n",
|
||||
" <td>2.01</td>\n",
|
||||
" <td>6.07</td>\n",
|
||||
" <td>4.24</td>\n",
|
||||
" <td>97.79</td>\n",
|
||||
" <td>11-1011</td>\n",
|
||||
" <td>211230.0</td>\n",
|
||||
" <td>124.47</td>\n",
|
||||
" <td>258900</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>11-1011.00</td>\n",
|
||||
" <td>8834</td>\n",
|
||||
" <td>Prepare or present reports concerning activiti...</td>\n",
|
||||
" <td>Chief Executives</td>\n",
|
||||
" <td>Determine and formulate policies and provide o...</td>\n",
|
||||
" <td>1.98</td>\n",
|
||||
" <td>14.06</td>\n",
|
||||
" <td>42.60</td>\n",
|
||||
" <td>21.24</td>\n",
|
||||
" <td>13.18</td>\n",
|
||||
" <td>6.24</td>\n",
|
||||
" <td>0.70</td>\n",
|
||||
" <td>4.17</td>\n",
|
||||
" <td>92.92</td>\n",
|
||||
" <td>11-1011</td>\n",
|
||||
" <td>211230.0</td>\n",
|
||||
" <td>124.47</td>\n",
|
||||
" <td>258900</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17634</th>\n",
|
||||
" <td>53-7121.00</td>\n",
|
||||
" <td>12807</td>\n",
|
||||
" <td>Unload cars containing liquids by connecting h...</td>\n",
|
||||
" <td>Tank Car, Truck, and Ship Loaders</td>\n",
|
||||
" <td>Load and unload chemicals and bulk solids, suc...</td>\n",
|
||||
" <td>6.05</td>\n",
|
||||
" <td>29.21</td>\n",
|
||||
" <td>6.88</td>\n",
|
||||
" <td>13.95</td>\n",
|
||||
" <td>27.65</td>\n",
|
||||
" <td>7.93</td>\n",
|
||||
" <td>8.34</td>\n",
|
||||
" <td>4.08</td>\n",
|
||||
" <td>64.04</td>\n",
|
||||
" <td>53-7121</td>\n",
|
||||
" <td>11400.0</td>\n",
|
||||
" <td>29.1</td>\n",
|
||||
" <td>60530</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17635</th>\n",
|
||||
" <td>53-7121.00</td>\n",
|
||||
" <td>12804</td>\n",
|
||||
" <td>Clean interiors of tank cars or tank trucks, u...</td>\n",
|
||||
" <td>Tank Car, Truck, and Ship Loaders</td>\n",
|
||||
" <td>Load and unload chemicals and bulk solids, suc...</td>\n",
|
||||
" <td>1.47</td>\n",
|
||||
" <td>6.33</td>\n",
|
||||
" <td>21.70</td>\n",
|
||||
" <td>25.69</td>\n",
|
||||
" <td>32.35</td>\n",
|
||||
" <td>12.47</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>4.02</td>\n",
|
||||
" <td>44.33</td>\n",
|
||||
" <td>53-7121</td>\n",
|
||||
" <td>11400.0</td>\n",
|
||||
" <td>29.1</td>\n",
|
||||
" <td>60530</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17636</th>\n",
|
||||
" <td>53-7121.00</td>\n",
|
||||
" <td>12803</td>\n",
|
||||
" <td>Lower gauge rods into tanks or read meters to ...</td>\n",
|
||||
" <td>Tank Car, Truck, and Ship Loaders</td>\n",
|
||||
" <td>Load and unload chemicals and bulk solids, suc...</td>\n",
|
||||
" <td>4.52</td>\n",
|
||||
" <td>1.76</td>\n",
|
||||
" <td>4.65</td>\n",
|
||||
" <td>17.81</td>\n",
|
||||
" <td>37.42</td>\n",
|
||||
" <td>23.31</td>\n",
|
||||
" <td>10.55</td>\n",
|
||||
" <td>3.88</td>\n",
|
||||
" <td>65.00</td>\n",
|
||||
" <td>53-7121</td>\n",
|
||||
" <td>11400.0</td>\n",
|
||||
" <td>29.1</td>\n",
|
||||
" <td>60530</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17637</th>\n",
|
||||
" <td>53-7121.00</td>\n",
|
||||
" <td>12805</td>\n",
|
||||
" <td>Operate conveyors and equipment to transfer gr...</td>\n",
|
||||
" <td>Tank Car, Truck, and Ship Loaders</td>\n",
|
||||
" <td>Load and unload chemicals and bulk solids, suc...</td>\n",
|
||||
" <td>6.97</td>\n",
|
||||
" <td>12.00</td>\n",
|
||||
" <td>2.52</td>\n",
|
||||
" <td>5.90</td>\n",
|
||||
" <td>35.48</td>\n",
|
||||
" <td>22.08</td>\n",
|
||||
" <td>15.05</td>\n",
|
||||
" <td>3.87</td>\n",
|
||||
" <td>47.90</td>\n",
|
||||
" <td>53-7121</td>\n",
|
||||
" <td>11400.0</td>\n",
|
||||
" <td>29.1</td>\n",
|
||||
" <td>60530</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17638</th>\n",
|
||||
" <td>53-7121.00</td>\n",
|
||||
" <td>12810</td>\n",
|
||||
" <td>Perform general warehouse activities, such as ...</td>\n",
|
||||
" <td>Tank Car, Truck, and Ship Loaders</td>\n",
|
||||
" <td>Load and unload chemicals and bulk solids, suc...</td>\n",
|
||||
" <td>5.91</td>\n",
|
||||
" <td>10.85</td>\n",
|
||||
" <td>6.46</td>\n",
|
||||
" <td>14.46</td>\n",
|
||||
" <td>34.14</td>\n",
|
||||
" <td>16.39</td>\n",
|
||||
" <td>11.78</td>\n",
|
||||
" <td>3.53</td>\n",
|
||||
" <td>47.84</td>\n",
|
||||
" <td>53-7121</td>\n",
|
||||
" <td>11400.0</td>\n",
|
||||
" <td>29.1</td>\n",
|
||||
" <td>60530</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>17639 rows × 18 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" onetsoc_code task_id \\\n",
|
||||
"0 11-1011.00 8823 \n",
|
||||
"1 11-1011.00 8824 \n",
|
||||
"2 11-1011.00 8827 \n",
|
||||
"3 11-1011.00 8826 \n",
|
||||
"4 11-1011.00 8834 \n",
|
||||
"... ... ... \n",
|
||||
"17634 53-7121.00 12807 \n",
|
||||
"17635 53-7121.00 12804 \n",
|
||||
"17636 53-7121.00 12803 \n",
|
||||
"17637 53-7121.00 12805 \n",
|
||||
"17638 53-7121.00 12810 \n",
|
||||
"\n",
|
||||
" task \\\n",
|
||||
"0 Direct or coordinate an organization's financi... \n",
|
||||
"1 Confer with board members, organization offici... \n",
|
||||
"2 Prepare budgets for approval, including those ... \n",
|
||||
"3 Direct, plan, or implement policies, objective... \n",
|
||||
"4 Prepare or present reports concerning activiti... \n",
|
||||
"... ... \n",
|
||||
"17634 Unload cars containing liquids by connecting h... \n",
|
||||
"17635 Clean interiors of tank cars or tank trucks, u... \n",
|
||||
"17636 Lower gauge rods into tanks or read meters to ... \n",
|
||||
"17637 Operate conveyors and equipment to transfer gr... \n",
|
||||
"17638 Perform general warehouse activities, such as ... \n",
|
||||
"\n",
|
||||
" occupation_title \\\n",
|
||||
"0 Chief Executives \n",
|
||||
"1 Chief Executives \n",
|
||||
"2 Chief Executives \n",
|
||||
"3 Chief Executives \n",
|
||||
"4 Chief Executives \n",
|
||||
"... ... \n",
|
||||
"17634 Tank Car, Truck, and Ship Loaders \n",
|
||||
"17635 Tank Car, Truck, and Ship Loaders \n",
|
||||
"17636 Tank Car, Truck, and Ship Loaders \n",
|
||||
"17637 Tank Car, Truck, and Ship Loaders \n",
|
||||
"17638 Tank Car, Truck, and Ship Loaders \n",
|
||||
"\n",
|
||||
" occupation_description \\\n",
|
||||
"0 Determine and formulate policies and provide o... \n",
|
||||
"1 Determine and formulate policies and provide o... \n",
|
||||
"2 Determine and formulate policies and provide o... \n",
|
||||
"3 Determine and formulate policies and provide o... \n",
|
||||
"4 Determine and formulate policies and provide o... \n",
|
||||
"... ... \n",
|
||||
"17634 Load and unload chemicals and bulk solids, suc... \n",
|
||||
"17635 Load and unload chemicals and bulk solids, suc... \n",
|
||||
"17636 Load and unload chemicals and bulk solids, suc... \n",
|
||||
"17637 Load and unload chemicals and bulk solids, suc... \n",
|
||||
"17638 Load and unload chemicals and bulk solids, suc... \n",
|
||||
"\n",
|
||||
" frequency_category_1 frequency_category_2 frequency_category_3 \\\n",
|
||||
"0 5.92 15.98 29.68 \n",
|
||||
"1 1.42 14.44 27.31 \n",
|
||||
"2 15.50 38.21 32.73 \n",
|
||||
"3 3.03 17.33 20.30 \n",
|
||||
"4 1.98 14.06 42.60 \n",
|
||||
"... ... ... ... \n",
|
||||
"17634 6.05 29.21 6.88 \n",
|
||||
"17635 1.47 6.33 21.70 \n",
|
||||
"17636 4.52 1.76 4.65 \n",
|
||||
"17637 6.97 12.00 2.52 \n",
|
||||
"17638 5.91 10.85 6.46 \n",
|
||||
"\n",
|
||||
" frequency_category_4 frequency_category_5 frequency_category_6 \\\n",
|
||||
"0 21.18 19.71 4.91 \n",
|
||||
"1 25.52 26.88 2.52 \n",
|
||||
"2 5.15 5.25 0.19 \n",
|
||||
"3 18.10 33.16 2.01 \n",
|
||||
"4 21.24 13.18 6.24 \n",
|
||||
"... ... ... ... \n",
|
||||
"17634 13.95 27.65 7.93 \n",
|
||||
"17635 25.69 32.35 12.47 \n",
|
||||
"17636 17.81 37.42 23.31 \n",
|
||||
"17637 5.90 35.48 22.08 \n",
|
||||
"17638 14.46 34.14 16.39 \n",
|
||||
"\n",
|
||||
" frequency_category_7 importance_average relevance_average OCC_CODE \\\n",
|
||||
"0 2.63 4.52 74.44 11-1011 \n",
|
||||
"1 1.90 4.32 81.71 11-1011 \n",
|
||||
"2 2.98 4.30 93.41 11-1011 \n",
|
||||
"3 6.07 4.24 97.79 11-1011 \n",
|
||||
"4 0.70 4.17 92.92 11-1011 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"17634 8.34 4.08 64.04 53-7121 \n",
|
||||
"17635 0.00 4.02 44.33 53-7121 \n",
|
||||
"17636 10.55 3.88 65.00 53-7121 \n",
|
||||
"17637 15.05 3.87 47.90 53-7121 \n",
|
||||
"17638 11.78 3.53 47.84 53-7121 \n",
|
||||
"\n",
|
||||
" TOT_EMP H_MEAN A_MEAN \n",
|
||||
"0 211230.0 124.47 258900 \n",
|
||||
"1 211230.0 124.47 258900 \n",
|
||||
"2 211230.0 124.47 258900 \n",
|
||||
"3 211230.0 124.47 258900 \n",
|
||||
"4 211230.0 124.47 258900 \n",
|
||||
"... ... ... ... \n",
|
||||
"17634 11400.0 29.1 60530 \n",
|
||||
"17635 11400.0 29.1 60530 \n",
|
||||
"17636 11400.0 29.1 60530 \n",
|
||||
"17637 11400.0 29.1 60530 \n",
|
||||
"17638 11400.0 29.1 60530 \n",
|
||||
"\n",
|
||||
"[17639 rows x 18 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_oesm_detailed = df_oesm[df_oesm['O_GROUP'] == 'detailed'][['OCC_CODE', 'TOT_EMP', 'H_MEAN', 'A_MEAN']].copy()\n",
|
||||
"df_enriched_trs['occ_code_join'] = df_enriched_trs['onetsoc_code'].str[:7]\n",
|
||||
"df = pd.merge(\n",
|
||||
" df_enriched_trs,\n",
|
||||
" df_oesm_detailed,\n",
|
||||
" left_on='occ_code_join',\n",
|
||||
" right_on='OCC_CODE',\n",
|
||||
" how='left'\n",
|
||||
")\n",
|
||||
"df = df.drop(columns=['occ_code_join'])\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "9be7acb5-2374-4f61-bba3-13b0077c0bd2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Task: Identify, evaluate and recommend hardware or software technologies to achieve desired database performance.\n",
|
||||
"Occupation Description: Design strategies for enterprise databases, data warehouse systems, and multidimensional networks. Set standards for database operations, programming, query processes, and security. Model, design, and construct large relational databases or data warehouses. Create and optimize data models for warehouse infrastructure and workflow. Integrate new systems with existing warehouse structure and refine system performance and functionality.\n",
|
||||
"Occupation Title: Database Architects\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"119976"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_merged = pd \\\n",
|
||||
" .merge(left=df_enriched_trs, right=df_remote_status[['O*NET-SOC Code', 'Remote']], how='left', left_on='onetsoc_code', right_on='O*NET-SOC Code') \\\n",
|
||||
" .drop(columns=['O*NET-SOC Code']) \\\n",
|
||||
" .rename(columns={'Remote': 'remote'}) \\\n",
|
||||
" .rename(columns=FREQUENCY_MAP) \\\n",
|
||||
" .query('remote == \"remote\" and importance_average >= 3 and relevance_average > 50')\n",
|
||||
"\n",
|
||||
"row = df_merged.iloc[30000]\n",
|
||||
"print('Task: ', row['task'])\n",
|
||||
"print('Occupation Description: ', row['occupation_description'])\n",
|
||||
"print('Occupation Title: ', row['occupation_title'])\n",
|
||||
"\n",
|
||||
"len(df_merged)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fd9ac1c3-6d17-4764-8a2e-c84d4019bd9e",
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cross-reference woth BLS OEWS\n",
|
||||
"# It doesn't really make sens to have it per-task, we only need it per-occupation...\n",
|
||||
"df_oesm_detailed = df_oesm[df_oesm['O_GROUP'] == 'detailed'][['OCC_CODE', 'TOT_EMP', 'H_MEAN', 'A_MEAN']].copy()\n",
|
||||
"df_merged['occ_code_join'] = df_merged['onetsoc_code'].str[:7]\n",
|
||||
"df_merged = pd.merge(\n",
|
||||
" df_merged,\n",
|
||||
" df_oesm_detailed,\n",
|
||||
" left_on='occ_code_join',\n",
|
||||
" right_on='OCC_CODE',\n",
|
||||
" how='left'\n",
|
||||
")\n",
|
||||
"df_merged = df_merged.drop(columns=['occ_code_join'])\n",
|
||||
"df_merged"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"id": "08f45d91-039d-4ec0-94a2-f305a3312e6a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Why did the scarecrow win an award?\n",
|
||||
"\n",
|
||||
"Because he was outstanding in his field!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = oai.chat.completions.create(messages=[{\"role\": \"user\", \"content\": \"Tell me a joke\"}], model=\"gpt-4.1-2025-04-14\", max_tokens=100, temperature=0.7, n=1, stop=None)\n",
|
||||
"joke = response.choices[0].message.content.strip()\n",
|
||||
"print(joke)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
24
archive/loss.py
Normal file
24
archive/loss.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
def calc_loss(df):
|
||||
"""
|
||||
Geometric-mean log error between prediction bands and golden bands.
|
||||
Assumes all columns are strictly positive.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pandas.DataFrame
|
||||
Must contain the columns:
|
||||
- 'pred_lower', 'pred_upper'
|
||||
- 'golden_lower', 'golden_upper'
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Scalar loss value (the smaller, the better).
|
||||
"""
|
||||
# Element-wise absolute log-ratios
|
||||
loss_lower = np.abs(np.log(df["pred_lower"] / df["golden_lower"]))
|
||||
loss_upper = np.abs(np.log(df["pred_upper"] / df["golden_upper"]))
|
||||
|
||||
# Average the two means, then exponentiate
|
||||
loss = np.exp(0.5 * (loss_lower.mean() + loss_upper.mean()))
|
||||
return loss
|
334
archive/onet_explorer_app.py
Normal file
334
archive/onet_explorer_app.py
Normal file
|
@ -0,0 +1,334 @@
|
|||
import streamlit as st
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
import graphviz
|
||||
import textwrap
|
||||
|
||||
# --- Database Setup ---
|
||||
DB_FILE = "onet.database"
|
||||
|
||||
|
||||
@st.cache_resource
|
||||
def get_db_connection():
|
||||
"""Establishes a connection to the SQLite database."""
|
||||
conn = sqlite3.connect(DB_FILE)
|
||||
conn.row_factory = sqlite3.Row # Access columns by name
|
||||
return conn
|
||||
|
||||
|
||||
@st.cache_data
|
||||
def get_occupations(_conn):
|
||||
"""Fetches all occupations from the database."""
|
||||
df = pd.read_sql_query(
|
||||
"SELECT onetsoc_code, title FROM occupation_data ORDER BY title", _conn
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@st.cache_data
|
||||
def get_iwas_for_occupation(_conn, onetsoc_code):
|
||||
"""
|
||||
Fetches IWAs for a given occupation.
|
||||
An occupation is linked to Work Activities (element_id in work_activities table).
|
||||
These Work Activity element_ids are then used in iwa_reference to find associated IWAs.
|
||||
"""
|
||||
query = """
|
||||
SELECT DISTINCT
|
||||
ir.iwa_id,
|
||||
ir.iwa_title
|
||||
FROM work_activities wa
|
||||
JOIN iwa_reference ir ON wa.element_id = ir.element_id
|
||||
WHERE wa.onetsoc_code = ?
|
||||
ORDER BY ir.iwa_title;
|
||||
"""
|
||||
df = pd.read_sql_query(query, _conn, params=(onetsoc_code,))
|
||||
return df
|
||||
|
||||
|
||||
@st.cache_data
|
||||
def get_dwas_for_iwas(_conn, iwa_ids):
|
||||
"""Fetches DWAs for a list of IWA IDs."""
|
||||
if not iwa_ids:
|
||||
return pd.DataFrame()
|
||||
placeholders = ",".join(
|
||||
"?" for _ in iwa_ids
|
||||
) # Create one placeholder for each IWA ID
|
||||
query = f"""
|
||||
SELECT DISTINCT
|
||||
dr.dwa_id,
|
||||
dr.dwa_title,
|
||||
dr.iwa_id -- to link back to the IWA
|
||||
FROM dwa_reference dr
|
||||
WHERE dr.iwa_id IN ({placeholders})
|
||||
ORDER BY dr.dwa_title;
|
||||
"""
|
||||
df = pd.read_sql_query(query, _conn, params=iwa_ids)
|
||||
return df
|
||||
|
||||
|
||||
@st.cache_data
|
||||
def get_tasks_for_dwas(_conn, onetsoc_code, dwa_ids):
|
||||
"""Fetches tasks for a given occupation and list of DWA IDs."""
|
||||
if not dwa_ids:
|
||||
return pd.DataFrame()
|
||||
placeholders = ",".join(
|
||||
"?" for _ in dwa_ids
|
||||
) # Create one placeholder for each DWA ID
|
||||
query = f"""
|
||||
SELECT DISTINCT
|
||||
ts.task_id,
|
||||
ts.task,
|
||||
t2d.dwa_id -- to link back to the DWA
|
||||
FROM task_statements ts
|
||||
JOIN tasks_to_dwas t2d ON ts.task_id = t2d.task_id
|
||||
WHERE ts.onetsoc_code = ? AND t2d.dwa_id IN ({placeholders})
|
||||
ORDER BY ts.task;
|
||||
"""
|
||||
# The parameters list should first contain onetsoc_code, then all DWA IDs.
|
||||
params = [onetsoc_code] + dwa_ids
|
||||
df = pd.read_sql_query(query, _conn, params=params)
|
||||
return df
|
||||
|
||||
|
||||
def smart_wrap(text, width=40):
|
||||
"""Wraps text for better display in graph nodes."""
|
||||
return "\n".join(
|
||||
textwrap.wrap(
|
||||
text,
|
||||
width=width,
|
||||
break_long_words=True,
|
||||
replace_whitespace=False,
|
||||
drop_whitespace=False,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# --- Streamlit App Layout ---
|
||||
st.set_page_config(layout="wide")
|
||||
|
||||
# Check if database file exists
|
||||
try:
|
||||
# Attempt to open for binary read to check existence and basic readability
|
||||
with open(DB_FILE, "rb") as f:
|
||||
pass
|
||||
conn = get_db_connection()
|
||||
except FileNotFoundError:
|
||||
st.error(
|
||||
f"Database file '{DB_FILE}' not found. Please ensure it is in the same directory as the script."
|
||||
)
|
||||
st.stop()
|
||||
except sqlite3.Error as e:
|
||||
st.error(f"Error connecting to or reading the database '{DB_FILE}': {e}")
|
||||
st.info(
|
||||
"Please ensure the database file is a valid SQLite database and not corrupted."
|
||||
)
|
||||
st.stop()
|
||||
|
||||
|
||||
st.title("O*NET Occupation Hierarchy Explorer")
|
||||
st.markdown("""
|
||||
This application visualizes the relationships between Occupations, Intermediate Work Activities (IWAs),
|
||||
Detailed Work Activities (DWAs), and Task Statements from the O*NET database.
|
||||
Select an occupation from the control panel on the left to view its hierarchical breakdown.
|
||||
""")
|
||||
|
||||
# --- Sidebar for Occupation Selection ---
|
||||
col1, col2 = st.columns([0.3, 0.7], gap="large")
|
||||
|
||||
with col1:
|
||||
st.header("Control Panel")
|
||||
occupations_df = get_occupations(conn)
|
||||
|
||||
if occupations_df.empty:
|
||||
st.warning("No occupations found in the database.")
|
||||
st.stop()
|
||||
|
||||
# Create a display string with code and title for the selectbox
|
||||
occupations_df["display_name"] = (
|
||||
occupations_df["title"] + " (" + occupations_df["onetsoc_code"] + ")"
|
||||
)
|
||||
|
||||
search_term = st.text_input(
|
||||
"Search for an occupation:", placeholder="E.g., Software Developer"
|
||||
)
|
||||
|
||||
if search_term:
|
||||
# Ensure search term is treated as a literal string for regex, if needed, or use basic string methods
|
||||
search_term_safe = (
|
||||
search_term.replace("[", "\\[")
|
||||
.replace("]", "\\]")
|
||||
.replace("(", "\\(")
|
||||
.replace(")", "\\)")
|
||||
)
|
||||
filtered_occupations = occupations_df[
|
||||
occupations_df["title"].str.contains(
|
||||
search_term_safe, case=False, regex=True
|
||||
)
|
||||
| occupations_df["onetsoc_code"].str.contains(
|
||||
search_term_safe, case=False, regex=True
|
||||
)
|
||||
]
|
||||
else:
|
||||
filtered_occupations = occupations_df
|
||||
|
||||
if not filtered_occupations.empty:
|
||||
# Sort filtered occupations for consistent display in selectbox
|
||||
filtered_occupations_sorted = filtered_occupations.sort_values("display_name")
|
||||
selected_occupation_display_name = st.selectbox(
|
||||
"Choose an occupation:",
|
||||
options=filtered_occupations_sorted["display_name"],
|
||||
index=0, # Default to the first item
|
||||
)
|
||||
|
||||
# Get the onetsoc_code and title from the selected display name
|
||||
selected_row = occupations_df[
|
||||
occupations_df["display_name"] == selected_occupation_display_name
|
||||
].iloc[0]
|
||||
selected_onetsoc_code = selected_row["onetsoc_code"]
|
||||
selected_occupation_title = selected_row["title"]
|
||||
else:
|
||||
st.warning("No occupations match your search term.")
|
||||
selected_onetsoc_code = None
|
||||
selected_occupation_title = None
|
||||
|
||||
# --- Main Area for Graph Display ---
|
||||
with col2:
|
||||
st.header("Occupation Graph")
|
||||
if selected_onetsoc_code:
|
||||
st.subheader(
|
||||
f"Displaying: {selected_occupation_title} ({selected_onetsoc_code})"
|
||||
)
|
||||
|
||||
iwas_df = get_iwas_for_occupation(conn, selected_onetsoc_code)
|
||||
|
||||
if iwas_df.empty:
|
||||
st.info(
|
||||
"No Intermediate Work Activities (IWAs) found directly linked for this occupation."
|
||||
)
|
||||
else:
|
||||
graph = graphviz.Digraph(
|
||||
comment=f"O*NET Hierarchy for {selected_onetsoc_code}"
|
||||
)
|
||||
graph.attr(
|
||||
rankdir="LR",
|
||||
splines="spline",
|
||||
concentrate="false",
|
||||
nodesep="0.5",
|
||||
ranksep="0.8",
|
||||
)
|
||||
|
||||
# Occupation Node
|
||||
occ_node_id = f"occ_{selected_onetsoc_code.replace('.', '_')}" # Ensure ID is valid for DOT
|
||||
occ_label = smart_wrap(
|
||||
f"Occupation: {selected_occupation_title}\n({selected_onetsoc_code})",
|
||||
width=30,
|
||||
)
|
||||
graph.node(
|
||||
occ_node_id,
|
||||
label=occ_label,
|
||||
shape="ellipse",
|
||||
style="filled",
|
||||
fillcolor="skyblue",
|
||||
)
|
||||
|
||||
# Fetch DWAs
|
||||
iwa_ids = iwas_df["iwa_id"].tolist()
|
||||
dwas_df = get_dwas_for_iwas(conn, iwa_ids)
|
||||
|
||||
dwa_ids_for_tasks = []
|
||||
if not dwas_df.empty:
|
||||
dwa_ids_for_tasks = dwas_df["dwa_id"].unique().tolist()
|
||||
|
||||
# Fetch Tasks
|
||||
tasks_df = get_tasks_for_dwas(
|
||||
conn, selected_onetsoc_code, dwa_ids_for_tasks
|
||||
)
|
||||
|
||||
# Add IWA Nodes and Edges
|
||||
for _, iwa_row in iwas_df.iterrows():
|
||||
iwa_node_id = f"iwa_{str(iwa_row['iwa_id']).replace('.', '_')}"
|
||||
iwa_label = smart_wrap(
|
||||
f"IWA: {iwa_row['iwa_title']}\n(ID: {iwa_row['iwa_id']})", width=35
|
||||
)
|
||||
graph.node(
|
||||
iwa_node_id,
|
||||
label=iwa_label,
|
||||
shape="box",
|
||||
style="filled",
|
||||
fillcolor="khaki",
|
||||
)
|
||||
graph.edge(occ_node_id, iwa_node_id)
|
||||
|
||||
# Add DWA Nodes and Edges (for this IWA)
|
||||
current_iwa_dwas = dwas_df[dwas_df["iwa_id"] == iwa_row["iwa_id"]]
|
||||
for _, dwa_row in current_iwa_dwas.iterrows():
|
||||
dwa_node_id = f"dwa_{str(dwa_row['dwa_id']).replace('.', '_')}"
|
||||
dwa_label = smart_wrap(
|
||||
f"DWA: {dwa_row['dwa_title']}\n(ID: {dwa_row['dwa_id']})",
|
||||
width=40,
|
||||
)
|
||||
graph.node(
|
||||
dwa_node_id,
|
||||
label=dwa_label,
|
||||
shape="box",
|
||||
style="filled",
|
||||
fillcolor="lightcoral",
|
||||
)
|
||||
graph.edge(iwa_node_id, dwa_node_id)
|
||||
|
||||
# Add Task Nodes and Edges (for this DWA and Occupation)
|
||||
current_dwa_tasks = tasks_df[
|
||||
tasks_df["dwa_id"] == dwa_row["dwa_id"]
|
||||
]
|
||||
for _, task_row in current_dwa_tasks.iterrows():
|
||||
# Ensure task_id is a string and valid for DOT
|
||||
task_id_str = str(task_row["task_id"]).split(".")[
|
||||
0
|
||||
] # Handle decimal task_ids if they appear
|
||||
task_node_id = f"task_{task_id_str}"
|
||||
task_label = smart_wrap(
|
||||
f"Task: {task_row['task']}\n(ID: {task_id_str})", width=50
|
||||
)
|
||||
graph.node(
|
||||
task_node_id,
|
||||
label=task_label,
|
||||
shape="note",
|
||||
style="filled",
|
||||
fillcolor="lightgray",
|
||||
)
|
||||
graph.edge(dwa_node_id, task_node_id)
|
||||
|
||||
if (
|
||||
not graph.body or len(graph.body) <= 1
|
||||
): # Check if any nodes were actually added beyond the occupation
|
||||
st.info(
|
||||
"No hierarchical data (IWAs, DWAs, Tasks) to display for this occupation after initial selection."
|
||||
)
|
||||
else:
|
||||
try:
|
||||
st.graphviz_chart(graph, use_container_width=True)
|
||||
with st.expander("View Data Tables for Selected Occupation"):
|
||||
st.markdown("##### Intermediate Work Activities (IWAs)")
|
||||
st.dataframe(iwas_df, use_container_width=True)
|
||||
if not dwas_df.empty:
|
||||
st.markdown("##### Detailed Work Activities (DWAs)")
|
||||
st.dataframe(dwas_df, use_container_width=True)
|
||||
if not tasks_df.empty:
|
||||
st.markdown("##### Task Statements")
|
||||
st.dataframe(tasks_df, use_container_width=True)
|
||||
|
||||
except Exception as e:
|
||||
st.error(
|
||||
f"Could not render the graph. Graphviz might not be installed correctly or there's an issue with the graph data: {e}"
|
||||
)
|
||||
st.text("Graphviz DOT source (for debugging):")
|
||||
st.code(graph.source, language="dot")
|
||||
else:
|
||||
st.info("Select an occupation from the control panel to see its graph.")
|
||||
|
||||
# Instructions to run the app:
|
||||
# 1. Save this code as a Python file (e.g., onet_explorer_app.py).
|
||||
# 2. Ensure the 'onet.database' file is in the same directory.
|
||||
# 3. Install the required libraries: pip install streamlit pandas graphviz
|
||||
# 4. Open your terminal or command prompt, navigate to the directory, and run:
|
||||
# streamlit run onet_explorer_app.py
|
352
archive/schema.txt
Normal file
352
archive/schema.txt
Normal file
|
@ -0,0 +1,352 @@
|
|||
CREATE TABLE content_model_reference (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
element_name CHARACTER VARYING(150) NOT NULL,
|
||||
description CHARACTER VARYING(1500) NOT NULL,
|
||||
PRIMARY KEY (element_id));
|
||||
CREATE TABLE job_zone_reference (
|
||||
job_zone DECIMAL(1,0) NOT NULL,
|
||||
name CHARACTER VARYING(50) NOT NULL,
|
||||
experience CHARACTER VARYING(300) NOT NULL,
|
||||
education CHARACTER VARYING(500) NOT NULL,
|
||||
job_training CHARACTER VARYING(300) NOT NULL,
|
||||
examples CHARACTER VARYING(500) NOT NULL,
|
||||
svp_range CHARACTER VARYING(25) NOT NULL,
|
||||
PRIMARY KEY (job_zone));
|
||||
CREATE TABLE occupation_data (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
title CHARACTER VARYING(150) NOT NULL,
|
||||
description CHARACTER VARYING(1000) NOT NULL,
|
||||
PRIMARY KEY (onetsoc_code));
|
||||
CREATE TABLE scales_reference (
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
scale_name CHARACTER VARYING(50) NOT NULL,
|
||||
minimum DECIMAL(1,0) NOT NULL,
|
||||
maximum DECIMAL(3,0) NOT NULL,
|
||||
PRIMARY KEY (scale_id));
|
||||
CREATE TABLE ete_categories (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
category DECIMAL(3,0) NOT NULL,
|
||||
category_description CHARACTER VARYING(1000) NOT NULL,
|
||||
PRIMARY KEY (element_id, scale_id, category),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE level_scale_anchors (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
anchor_value DECIMAL(3,0) NOT NULL,
|
||||
anchor_description CHARACTER VARYING(1000) NOT NULL,
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE occupation_level_metadata (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
item CHARACTER VARYING(150) NOT NULL,
|
||||
response CHARACTER VARYING(75),
|
||||
n DECIMAL(4,0),
|
||||
percent DECIMAL(4,1),
|
||||
date_updated DATE NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code));
|
||||
CREATE TABLE survey_booklet_locations (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
survey_item_number CHARACTER VARYING(4) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE task_categories (
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
category DECIMAL(3,0) NOT NULL,
|
||||
category_description CHARACTER VARYING(1000) NOT NULL,
|
||||
PRIMARY KEY (scale_id, category),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE work_context_categories (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
category DECIMAL(3,0) NOT NULL,
|
||||
category_description CHARACTER VARYING(1000) NOT NULL,
|
||||
PRIMARY KEY (element_id, scale_id, category),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE abilities (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
not_relevant CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE education_training_experience (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
category DECIMAL(3,0),
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id),
|
||||
FOREIGN KEY (element_id, scale_id, category) REFERENCES ete_categories(element_id, scale_id, category));
|
||||
CREATE TABLE interests (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE job_zones (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
job_zone DECIMAL(1,0) NOT NULL,
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (job_zone) REFERENCES job_zone_reference(job_zone));
|
||||
CREATE TABLE knowledge (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
not_relevant CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE skills (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
not_relevant CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE task_statements (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
task_id DECIMAL(8,0) NOT NULL,
|
||||
task CHARACTER VARYING(1000) NOT NULL,
|
||||
task_type CHARACTER VARYING(12),
|
||||
incumbents_responding DECIMAL(4,0),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
PRIMARY KEY (task_id),
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code));
|
||||
CREATE TABLE task_ratings (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
task_id DECIMAL(8,0) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
category DECIMAL(3,0),
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (task_id) REFERENCES task_statements(task_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id),
|
||||
FOREIGN KEY (scale_id, category) REFERENCES task_categories(scale_id, category));
|
||||
CREATE TABLE work_activities (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
not_relevant CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE work_context (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
category DECIMAL(3,0),
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
not_relevant CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id),
|
||||
FOREIGN KEY (element_id, scale_id, category) REFERENCES work_context_categories(element_id, scale_id, category));
|
||||
CREATE TABLE work_styles (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
n DECIMAL(4,0),
|
||||
standard_error DECIMAL(7,4),
|
||||
lower_ci_bound DECIMAL(7,4),
|
||||
upper_ci_bound DECIMAL(7,4),
|
||||
recommend_suppress CHARACTER(1),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE work_values (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
scale_id CHARACTER VARYING(3) NOT NULL,
|
||||
data_value DECIMAL(5,2) NOT NULL,
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (scale_id) REFERENCES scales_reference(scale_id));
|
||||
CREATE TABLE iwa_reference (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
iwa_id CHARACTER VARYING(20) NOT NULL,
|
||||
iwa_title CHARACTER VARYING(150) NOT NULL,
|
||||
PRIMARY KEY (iwa_id),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE dwa_reference (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
iwa_id CHARACTER VARYING(20) NOT NULL,
|
||||
dwa_id CHARACTER VARYING(20) NOT NULL,
|
||||
dwa_title CHARACTER VARYING(150) NOT NULL,
|
||||
PRIMARY KEY (dwa_id),
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (iwa_id) REFERENCES iwa_reference(iwa_id));
|
||||
CREATE TABLE tasks_to_dwas (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
task_id DECIMAL(8,0) NOT NULL,
|
||||
dwa_id CHARACTER VARYING(20) NOT NULL,
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (task_id) REFERENCES task_statements(task_id),
|
||||
FOREIGN KEY (dwa_id) REFERENCES dwa_reference(dwa_id));
|
||||
CREATE TABLE emerging_tasks (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
task CHARACTER VARYING(1000) NOT NULL,
|
||||
category CHARACTER VARYING(8) NOT NULL,
|
||||
original_task_id DECIMAL(8,0),
|
||||
date_updated DATE NOT NULL,
|
||||
domain_source CHARACTER VARYING(30) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (original_task_id) REFERENCES task_statements(task_id));
|
||||
CREATE TABLE related_occupations (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
related_onetsoc_code CHARACTER(10) NOT NULL,
|
||||
relatedness_tier CHARACTER VARYING(50) NOT NULL,
|
||||
related_index DECIMAL(3,0) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (related_onetsoc_code) REFERENCES occupation_data(onetsoc_code));
|
||||
CREATE TABLE unspsc_reference (
|
||||
commodity_code DECIMAL(8,0) NOT NULL,
|
||||
commodity_title CHARACTER VARYING(150) NOT NULL,
|
||||
class_code DECIMAL(8,0) NOT NULL,
|
||||
class_title CHARACTER VARYING(150) NOT NULL,
|
||||
family_code DECIMAL(8,0) NOT NULL,
|
||||
family_title CHARACTER VARYING(150) NOT NULL,
|
||||
segment_code DECIMAL(8,0) NOT NULL,
|
||||
segment_title CHARACTER VARYING(150) NOT NULL,
|
||||
PRIMARY KEY (commodity_code));
|
||||
CREATE TABLE alternate_titles (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
alternate_title CHARACTER VARYING(250) NOT NULL,
|
||||
short_title CHARACTER VARYING(150),
|
||||
sources CHARACTER VARYING(50) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code));
|
||||
CREATE TABLE sample_of_reported_titles (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
reported_job_title CHARACTER VARYING(150) NOT NULL,
|
||||
shown_in_my_next_move CHARACTER(1) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code));
|
||||
CREATE TABLE technology_skills (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
example CHARACTER VARYING(150) NOT NULL,
|
||||
commodity_code DECIMAL(8,0) NOT NULL,
|
||||
hot_technology CHARACTER(1) NOT NULL,
|
||||
in_demand CHARACTER(1) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (commodity_code) REFERENCES unspsc_reference(commodity_code));
|
||||
CREATE TABLE tools_used (
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
example CHARACTER VARYING(150) NOT NULL,
|
||||
commodity_code DECIMAL(8,0) NOT NULL,
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code),
|
||||
FOREIGN KEY (commodity_code) REFERENCES unspsc_reference(commodity_code));
|
||||
CREATE TABLE abilities_to_work_activities (
|
||||
abilities_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
work_activities_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
FOREIGN KEY (abilities_element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (work_activities_element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE abilities_to_work_context (
|
||||
abilities_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
work_context_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
FOREIGN KEY (abilities_element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (work_context_element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE skills_to_work_activities (
|
||||
skills_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
work_activities_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
FOREIGN KEY (skills_element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (work_activities_element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE skills_to_work_context (
|
||||
skills_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
work_context_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
FOREIGN KEY (skills_element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (work_context_element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE riasec_keywords (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
keyword CHARACTER VARYING(150) NOT NULL,
|
||||
keyword_type CHARACTER VARYING(20) NOT NULL,
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE basic_interests_to_riasec (
|
||||
basic_interests_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
riasec_element_id CHARACTER VARYING(20) NOT NULL,
|
||||
FOREIGN KEY (basic_interests_element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (riasec_element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE interests_illus_activities (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
interest_type CHARACTER VARYING(20) NOT NULL,
|
||||
activity CHARACTER VARYING(150) NOT NULL,
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id));
|
||||
CREATE TABLE interests_illus_occupations (
|
||||
element_id CHARACTER VARYING(20) NOT NULL,
|
||||
interest_type CHARACTER VARYING(20) NOT NULL,
|
||||
onetsoc_code CHARACTER(10) NOT NULL,
|
||||
FOREIGN KEY (element_id) REFERENCES content_model_reference(element_id),
|
||||
FOREIGN KEY (onetsoc_code) REFERENCES occupation_data(onetsoc_code));
|
||||
CREATE TABLE sqlite_stat1(tbl,idx,stat);
|
21699
archive/tasks_estimateable.csv
Normal file
21699
archive/tasks_estimateable.csv
Normal file
File diff suppressed because it is too large
Load diff
21699
archive/tasks_with_estimates.csv
Normal file
21699
archive/tasks_with_estimates.csv
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue