{ "cells": [ { "cell_type": "code", "execution_count": 86, "id": "beace815-b5ae-44a4-a81c-a7f82cb66296", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K\u001b[2mResolved \u001b[1m118 packages\u001b[0m \u001b[2min 386ms\u001b[0m\u001b[0m \u001b[0m\n", "\u001b[2K\u001b[2mPrepared \u001b[1m2 packages\u001b[0m \u001b[2min 124ms\u001b[0m\u001b[0m \n", "\u001b[2K\u001b[2mInstalled \u001b[1m2 packages\u001b[0m \u001b[2min 5ms\u001b[0m\u001b[0m \u001b[0m\n", " \u001b[32m+\u001b[39m \u001b[1met-xmlfile\u001b[0m\u001b[2m==2.0.0\u001b[0m\n", " \u001b[32m+\u001b[39m \u001b[1mopenpyxl\u001b[0m\u001b[2m==3.1.5\u001b[0m\n" ] } ], "source": [ "!uv add pandas requests openai dotenv openpyxl" ] }, { "cell_type": "code", "execution_count": 9, "id": "941d511f-ad72-4306-bbab-52127583e513", "metadata": {}, "outputs": [], "source": [ "import os\n", "import dotenv\n", "import openai\n", "import sqlite3\n", "import pandas as pd\n", "\n", "dotenv.load_dotenv() # Copy .env.example to .env and fill in the blanks\n", "\n", "oai_token = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "oai = openai.OpenAI(api_key=oai_token)\n", "onet = sqlite3.connect(\"onet.database\") # Run ./create_onet_database.sh to create it\n", "# This dataset comes from https://epoch.ai/gradient-updates/consequences-of-automating-remote-work\n", "# It contains labels for whethere a O*NET task can be done remotely or not (labeled by GPT-4o)\n", "# You can download it here: https://drive.google.com/file/d/1GrHhuYIgaCCgo99dZ_40BWraz-fzo76r/view?usp=sharing\n", "df_remote_status = pd.read_csv(\"epoch_task_data.csv\")\n", "\n", "# BLS OEWS: https://www.bls.gov/oes/special-requests/oesm23nat.zip\n", "df_oesm = pd.read_excel(\"oesm23national.xlsx\")\n", "\n", "# Run uv run enrich_task_ratings.py to get this file (trs = Task RatingS)\n", "df_enriched_trs = pd.read_json(\"task_ratings_enriched.json\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "a5351f8b-c2ad-4d3e-af4a-992f539a6064", "metadata": {}, "outputs": [], "source": [ "FREQUENCY_MAP = {\n", " 'frequency_category_1': \"Yearly or less\",\n", " 'frequency_category_2': \"More than yearly\",\n", " 'frequency_category_3': \"More than monthly\",\n", " 'frequency_category_4': \"More than weekly\",\n", " 'frequency_category_5': \"Daily\",\n", " 'frequency_category_6': \"Several times daily\",\n", " 'frequency_category_7': \"Hourly or more\"\n", "}" ] }, { "cell_type": "code", "execution_count": 4, "id": "8b2ab22a-afab-41f9-81a3-48eab261b568", "metadata": {}, "outputs": [], "source": [ "background_prompt = '''\n", "Estimate the typical duration to complete *one instance* of the following job task from the moment a person starts to work on it to the last moment the person will need to keep it in mind\n", "\n", "Take into account that there might be delays between the steps to complete the task, which would lengthen the estimate.\n", "\n", "Output a range with the format [duration A] - [duration B] where [duration A] and [duration B] correspond to one of the durations below:\n", "- less than 30 minutes\n", "- 30 minutes\n", "- 1 hour\n", "- 4 hours\n", "- 8 hours\n", "- 16 hours\n", "- 3 days\n", "- 1 week\n", "- 3 weeks\n", "- 6 weeks\n", "- 3 months\n", "- 6 months\n", "- 1 year\n", "- 3 years\n", "- more than 3 year\n", "\n", "**Do not output anything besides the range**\n", "'''" ] }, { "cell_type": "code", "execution_count": 11, "id": "d2e4a855-f327-4b3d-ad0b-ed997e720639", "metadata": {}, "outputs": [], "source": [ "df_oesm_detailed = df_oesm[df_oesm['O_GROUP'] == 'detailed'][['OCC_CODE', 'TOT_EMP', 'H_MEAN', 'A_MEAN']].copy()\n", "df_enriched_trs['occ_code_join'] = df_enriched_trs['onetsoc_code'].str[:7]\n", "df_merged = pd.merge(\n", " df_enriched_trs,\n", " df_oesm_detailed,\n", " left_on='occ_code_join',\n", " right_on='OCC_CODE',\n", " how='left'\n", ")\n", "df_merged = df_merged.drop(columns=['occ_code_join'])" ] }, { "cell_type": "code", "execution_count": 12, "id": "9be7acb5-2374-4f61-bba3-13b0077c0bd2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Task: Develop or recommend network security measures, such as firewalls, network security audits, or automated security probes.\n", "Occupation Description: Design and implement computer and information networks, such as local area networks (LAN), wide area networks (WAN), intranets, extranets, and other data communications networks. Perform network modeling, analysis, and planning, including analysis of capacity needs for network infrastructures. May also design network and computer security measures. May research and recommend network and data communications hardware and software.\n", "Occupation Title: Computer Network Architects\n" ] }, { "data": { "text/plain": [ "onetsoc_code 15-1241.00\n", "task_id 18971\n", "task Develop or recommend network security measures...\n", "occupation_title Computer Network Architects\n", "occupation_description Design and implement computer and information ...\n", "Yearly or less 0.0\n", "More than yearly 30.0\n", "More than monthly 15.0\n", "More than weekly 20.0\n", "Daily 15.0\n", "Several times daily 15.0\n", "Hourly or more 5.0\n", "importance_average 4.35\n", "relevance_average 100.0\n", "occ_code_join 15-1241\n", "remote remote\n", "Name: 45200, dtype: object" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "df_merged = pd \\\n", " .merge(left=df_enriched_trs, right=df_remote_status[['O*NET-SOC Code', 'Remote']], how='left', left_on='onetsoc_code', right_on='O*NET-SOC Code') \\\n", " .drop(columns=['O*NET-SOC Code']) \\\n", " .rename(columns={'Remote': 'remote'}) \\\n", " .rename(columns=FREQUENCY_MAP) \\\n", " .query('remote == \"remote\" and importance_average >= 3')\n", "\n", "row = df_merged.iloc[30000]\n", "print('Task: ', row['task'])\n", "print('Occupation Description: ', row['occupation_description'])\n", "print('Occupation Title: ', row['occupation_title'])\n", "\n", "row" ] }, { "cell_type": "code", "execution_count": 13, "id": "9e5ea89f-2c18-459d-851d-dacb379f4a2e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | onetsoc_code | \n", "task_id | \n", "task | \n", "occupation_title | \n", "occupation_description | \n", "Yearly or less | \n", "More than yearly | \n", "More than monthly | \n", "More than weekly | \n", "Daily | \n", "Several times daily | \n", "Hourly or more | \n", "importance_average | \n", "relevance_average | \n", "remote | \n", "OCC_CODE | \n", "TOT_EMP | \n", "H_MEAN | \n", "A_MEAN | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "11-1011.00 | \n", "8823 | \n", "Direct or coordinate an organization's financi... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "5.92 | \n", "15.98 | \n", "29.68 | \n", "21.18 | \n", "19.71 | \n", "4.91 | \n", "2.63 | \n", "4.52 | \n", "74.44 | \n", "remote | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
1 | \n", "11-1011.00 | \n", "8823 | \n", "Direct or coordinate an organization's financi... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "5.92 | \n", "15.98 | \n", "29.68 | \n", "21.18 | \n", "19.71 | \n", "4.91 | \n", "2.63 | \n", "4.52 | \n", "74.44 | \n", "remote | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
2 | \n", "11-1011.00 | \n", "8823 | \n", "Direct or coordinate an organization's financi... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "5.92 | \n", "15.98 | \n", "29.68 | \n", "21.18 | \n", "19.71 | \n", "4.91 | \n", "2.63 | \n", "4.52 | \n", "74.44 | \n", "remote | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
3 | \n", "11-1011.00 | \n", "8823 | \n", "Direct or coordinate an organization's financi... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "5.92 | \n", "15.98 | \n", "29.68 | \n", "21.18 | \n", "19.71 | \n", "4.91 | \n", "2.63 | \n", "4.52 | \n", "74.44 | \n", "remote | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
4 | \n", "11-1011.00 | \n", "8823 | \n", "Direct or coordinate an organization's financi... | \n", "Chief Executives | \n", "Determine and formulate policies and provide o... | \n", "5.92 | \n", "15.98 | \n", "29.68 | \n", "21.18 | \n", "19.71 | \n", "4.91 | \n", "2.63 | \n", "4.52 | \n", "74.44 | \n", "remote | \n", "11-1011 | \n", "211230.0 | \n", "124.47 | \n", "258900 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
127653 | \n", "53-7121.00 | \n", "12807 | \n", "Unload cars containing liquids by connecting h... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "6.05 | \n", "29.21 | \n", "6.88 | \n", "13.95 | \n", "27.65 | \n", "7.93 | \n", "8.34 | \n", "4.08 | \n", "64.04 | \n", "remote | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
127654 | \n", "53-7121.00 | \n", "12804 | \n", "Clean interiors of tank cars or tank trucks, u... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "1.47 | \n", "6.33 | \n", "21.70 | \n", "25.69 | \n", "32.35 | \n", "12.47 | \n", "0.00 | \n", "4.02 | \n", "44.33 | \n", "remote | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
127655 | \n", "53-7121.00 | \n", "12803 | \n", "Lower gauge rods into tanks or read meters to ... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "4.52 | \n", "1.76 | \n", "4.65 | \n", "17.81 | \n", "37.42 | \n", "23.31 | \n", "10.55 | \n", "3.88 | \n", "65.00 | \n", "remote | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
127656 | \n", "53-7121.00 | \n", "12805 | \n", "Operate conveyors and equipment to transfer gr... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "6.97 | \n", "12.00 | \n", "2.52 | \n", "5.90 | \n", "35.48 | \n", "22.08 | \n", "15.05 | \n", "3.87 | \n", "47.90 | \n", "remote | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
127657 | \n", "53-7121.00 | \n", "12810 | \n", "Perform general warehouse activities, such as ... | \n", "Tank Car, Truck, and Ship Loaders | \n", "Load and unload chemicals and bulk solids, suc... | \n", "5.91 | \n", "10.85 | \n", "6.46 | \n", "14.46 | \n", "34.14 | \n", "16.39 | \n", "11.78 | \n", "3.53 | \n", "47.84 | \n", "remote | \n", "53-7121 | \n", "11400.0 | \n", "29.1 | \n", "60530 | \n", "
127658 rows × 19 columns
\n", "