#Read in/download code for using National Vital Statistics System (NVSS) mortality data in R. #Code Created by Hannah Free at the National Institute for Occupational Safety and Health (NIOSH), United States Centers for Disease Control and Prevention. #Acknowledgements: Stephen Bertke, NIOSH. #FOR PROBLEMS OR QUESTIONS ABOUT THIS CODE, PLEASE EMAIL: noms@cdc.gov #Basic Setup------------------------------------------------------------------------------------------------------------ setwd() #Set working directory to your data by adding path library(tidyverse) #The NVSS multiple mortality file for a given year can be found here: #https://www.cdc.gov/nchs/data_access/vitalstatsonline.htm#Mortality_Multiple #Note that Prior to 2021 data, the files are not text files and will require more alterations not covered here. #File documentation for a given year can be found here: #https://www.cdc.gov/nchs/nvss/mortality_public_use_data.htm #The file will be read in as a fixed width file with the variables that are specified. If you want to edit the variables used, you will need to change the three #positions to match the variables you want based on the the NVSS file documentation on multiple cause record layout. The 3 positions include the record start #position or "Tape location", end position, and variable name or "data item." Based on the documentation, the variable for sex is at tape location 69, so the three #positions are 69 (start), 69 (end), and sex (name of data item). If the item is across several locations like the age recode to 12 options, the three positions # are 79 (start), 80 (end), and Age_recode_12 (name of data item). These are added to the list until you have all of the variables you need specified for the data # frame you wish to create. # Read in Data start <- Sys.time() #Leave blank df <- read_fwf("", #Add file path in parenthesis fwf_positions(c(69, 79, 63, 106, 489, 484, 146, 806, 810, 812, 816, 20, 64, 65, 70, 74, 75, 77, 81, 83, 84, 85, 102, 107, 108, 109, 144, 145, 150, 154, 157, 160, 341, 344, 349, 354, 359, 364, 369, 374, 379, 384, 389, 394, 399, 404, 409, 414, 419, 424, 429, 434, 439, 448), c(69, 80, 64, 106, 490, 486, 149, 809, 811, 815, 817, 20, 64, 66, 73, 74, 76, 78, 82, 83, 84, 85, 105, 107, 108, 109, 144, 145, 152, 156, 159, 161, 342, 348, 353, 358, 363, 368, 373, 378, 383, 388, 393, 398, 403, 408, 413, 418, 423, 428, 433, 438, 443, 448), c('Sex','Age_Recode_12','Education_2003','Injury_at_Work', 'RaceRecode_40', 'Hispanic_Origin', 'UNDERLYING_CAUSE_OF_DEATH', 'CensusOcc', 'Occ_26', 'CensusInd', 'Ind_23', 'Resident_Status_US', 'Education_flag', 'Month_of_Death', 'DetailAge', 'Age_Substitution_Flag', 'Age_Recode_52', 'Age_Recode_27', 'Infant_Age_Recode', 'Place_of_Death_andStatus', 'Marital_Status', 'Day_of_Week_of_Death', 'Current_Data_Year', 'Manner_of_Death', 'Method_of_Disposition', 'Autopsy', 'Activity_Code', 'Place_of_Injury', 'Cause_Recode_358', 'Cause_Recode_113', 'Infant_Cause_Recode', 'Cause_Recode_39', 'Number_Record_Axis_Conditions', 'Condition_1RA', 'Condition_2RA', 'Condition_3RA', 'Condition_4RA', 'Condition_5RA', 'Condition_6RA', 'Condition_7RA', 'Condition_8RA', 'Condition_9RA', 'Condition_10RA', 'Condition_11RA', 'Condition_12RA', 'Condition_13RA', 'Condition_14RA', 'Condition_15RA', 'Condition_16RA', 'Condition_17RA', 'Condition_18RA', 'Condition_19RA', 'Condition_20RA', 'Race_Imputation_Flag')), col_types = cols(.default = col_character()))