{ "cells": [ { "cell_type": "code", "execution_count": 86, "id": "beace815-b5ae-44a4-a81c-a7f82cb66296", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K\u001b[2mResolved \u001b[1m118 packages\u001b[0m \u001b[2min 386ms\u001b[0m\u001b[0m \u001b[0m\n", "\u001b[2K\u001b[2mPrepared \u001b[1m2 packages\u001b[0m \u001b[2min 124ms\u001b[0m\u001b[0m \n", "\u001b[2K\u001b[2mInstalled \u001b[1m2 packages\u001b[0m \u001b[2min 5ms\u001b[0m\u001b[0m \u001b[0m\n", " \u001b[32m+\u001b[39m \u001b[1met-xmlfile\u001b[0m\u001b[2m==2.0.0\u001b[0m\n", " \u001b[32m+\u001b[39m \u001b[1mopenpyxl\u001b[0m\u001b[2m==3.1.5\u001b[0m\n" ] } ], "source": [ "!uv add pandas requests openai dotenv openpyxl" ] }, { "cell_type": "code", "execution_count": 3, "id": "941d511f-ad72-4306-bbab-52127583e513", "metadata": {}, "outputs": [], "source": [ "import os\n", "import dotenv\n", "import openai\n", "import sqlite3\n", "import pandas as pd\n", "\n", "dotenv.load_dotenv() # Copy .env.example to .env and fill in the blanks\n", "\n", "oai_token = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "oai = openai.OpenAI(api_key=oai_token)\n", "onet = sqlite3.connect(\"onet.database\") # Run ./create_onet_database.sh to create it\n", "# This dataset comes from https://epoch.ai/gradient-updates/consequences-of-automating-remote-work\n", "# It contains labels for whethere a O*NET task can be done remotely or not (labeled by GPT-4o)\n", "# You can download it here: https://drive.google.com/file/d/1GrHhuYIgaCCgo99dZ_40BWraz-fzo76r/view?usp=sharing\n", "df_remote_status = pd.read_csv(\"epoch_task_data.csv\")\n", "\n", "# BLS OEWS: https://www.bls.gov/oes/special-requests/oesm23nat.zip\n", "df_oesm = pd.read_excel(\"oesm23national.xlsx\")\n", "\n", "# Run uv run enrich_task_ratings.py to get this file (trs = Task RatingS)\n", "df_enriched_trs = pd.read_json(\"task_ratings_enriched.json\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "a5351f8b-c2ad-4d3e-af4a-992f539a6064", "metadata": {}, "outputs": [], "source": [ "FREQUENCY_MAP = {\n", " 'frequency_category_1': \"Yearly or less\",\n", " 'frequency_category_2': \"More than yearly\",\n", " 'frequency_category_3': \"More than monthly\",\n", " 'frequency_category_4': \"More than weekly\",\n", " 'frequency_category_5': \"Daily\",\n", " 'frequency_category_6': \"Several times daily\",\n", " 'frequency_category_7': \"Hourly or more\"\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "id": "8b2ab22a-afab-41f9-81a3-48eab261b568", "metadata": {}, "outputs": [], "source": [ "background_prompt = '''\n", "Estimate the typical duration to complete *one instance* of the following job task from the moment a person starts to work on it to the last moment the person will need to keep it in mind\n", "\n", "Take into account that there might be delays between the steps to complete the task, which would lengthen the estimate.\n", "\n", "Output a range with the format [duration A] - [duration B] where [duration A] and [duration B] correspond to one of the durations below:\n", "- less than 30 minutes\n", "- 30 minutes\n", "- 1 hour\n", "- 4 hours\n", "- 8 hours\n", "- 16 hours\n", "- 3 days\n", "- 1 week\n", "- 3 weeks\n", "- 6 weeks\n", "- 3 months\n", "- 6 months\n", "- 1 year\n", "- 3 years\n", "- more than 3 year\n", "\n", "**Do not output anything besides the range**\n", "'''" ] }, { "cell_type": "code", "execution_count": 16, "id": "d2e4a855-f327-4b3d-ad0b-ed997e720639", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | onetsoc_code | \n", "task_id | \n", "task | \n", "occupation_title | \n", "occupation_description | \n", "frequency_category_1 | \n", "frequency_category_2 | \n", "frequency_category_3 | \n", "frequency_category_4 | \n", "frequency_category_5 | \n", "frequency_category_6 | \n", "frequency_category_7 | \n", "importance_average | \n", "relevance_average | \n", "OCC_CODE | \n", "TOT_EMP | \n", "H_MEAN | \n", "A_MEAN | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "11-1011.00 | \n", "8823 | \n", "Direct or coordinate an organization's financi... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "5.92 | \n", "15.98 | \n", "29.68 | \n", "21.18 | \n", "19.71 | \n", "4.91 | \n", "2.63 | \n", "4.52 | \n", "74.44 | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
1 | \n", "11-1011.00 | \n", "8824 | \n", "Confer with board members, organization offici... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "1.42 | \n", "14.44 | \n", "27.31 | \n", "25.52 | \n", "26.88 | \n", "2.52 | \n", "1.90 | \n", "4.32 | \n", "81.71 | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
2 | \n", "11-1011.00 | \n", "8827 | \n", "Prepare budgets for approval, including those ... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "15.50 | \n", "38.21 | \n", "32.73 | \n", "5.15 | \n", "5.25 | \n", "0.19 | \n", "2.98 | \n", "4.30 | \n", "93.41 | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
3 | \n", "11-1011.00 | \n", "8826 | \n", "Direct, plan, or implement policies, objective... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "3.03 | \n", "17.33 | \n", "20.30 | \n", "18.10 | \n", "33.16 | \n", "2.01 | \n", "6.07 | \n", "4.24 | \n", "97.79 | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
4 | \n", "11-1011.00 | \n", "8834 | \n", "Prepare or present reports concerning activiti... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "1.98 | \n", "14.06 | \n", "42.60 | \n", "21.24 | \n", "13.18 | \n", "6.24 | \n", "0.70 | \n", "4.17 | \n", "92.92 | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
17634 | \n", "53-7121.00 | \n", "12807 | \n", "Unload cars containing liquids by connecting h... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "6.05 | \n", "29.21 | \n", "6.88 | \n", "13.95 | \n", "27.65 | \n", "7.93 | \n", "8.34 | \n", "4.08 | \n", "64.04 | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
17635 | \n", "53-7121.00 | \n", "12804 | \n", "Clean interiors of tank cars or tank trucks, u... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "1.47 | \n", "6.33 | \n", "21.70 | \n", "25.69 | \n", "32.35 | \n", "12.47 | \n", "0.00 | \n", "4.02 | \n", "44.33 | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
17636 | \n", "53-7121.00 | \n", "12803 | \n", "Lower gauge rods into tanks or read meters to ... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "4.52 | \n", "1.76 | \n", "4.65 | \n", "17.81 | \n", "37.42 | \n", "23.31 | \n", "10.55 | \n", "3.88 | \n", "65.00 | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
17637 | \n", "53-7121.00 | \n", "12805 | \n", "Operate conveyors and equipment to transfer gr... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "6.97 | \n", "12.00 | \n", "2.52 | \n", "5.90 | \n", "35.48 | \n", "22.08 | \n", "15.05 | \n", "3.87 | \n", "47.90 | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
17638 | \n", "53-7121.00 | \n", "12810 | \n", "Perform general warehouse activities, such as ... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "5.91 | \n", "10.85 | \n", "6.46 | \n", "14.46 | \n", "34.14 | \n", "16.39 | \n", "11.78 | \n", "3.53 | \n", "47.84 | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
17639 rows × 18 columns
\n", "