Tyler Widdison
  • About
  • Blog

Select Health providers

Maps
Data gathering
Informational
Data gathering, wrangling, visualizing with health providers
Published

June 22, 2023

SelectHealth offers medical and dental insurance plans in Utah, Idaho, and Nevada. This post’s goal is to build a interactive tool which can identify providers in a map using their machine readable data!

Data is gathered from https://selecthealth.org/machine-readable-data. Only gathering the ‘Providers’ data for this post. https://ebu.intermountainhealthcare.org/shprovider/exchange.providers.json.


Code
library(dplyr)
library(tidyr)
library(readr)
library(jsonlite)
library(DT)
library(shiny)
library(shinyWidgets)
library(purrr)
library(leaflet)

# Load data
#provider <- jsonlite::fromJSON("https://ebu.intermountainhealthcare.org/shprovider/exchange.providers.json")

load(here::here(file = 'posts/select health/provider.rda'))

# Wrangle data
data <- provider %>% 
    dplyr::mutate(addresses = purrr::map(addresses, as.data.frame),
                  facility_type = purrr::map_chr(facility_type, ~ ifelse(length(.x) > 0, purrr::pluck(.x, 1), NA)),
                  plans = purrr::map(plans, as.data.frame),
                  first_name = name$first,
                  last_name = name$last
    ) %>% 
    tidyr::unnest(addresses, keep_empty = T) %>% 
    tidyr::unnest(languages, keep_empty = T) %>% 
    tidyr::unnest(specialty, keep_empty = T) %>% 
    tidyr::unnest(plans, keep_empty = T) %>% 
    tidyr::unnest(years, keep_empty = T) %>% 
    dplyr::mutate(addresses = stringr::str_replace_all(address, "&", ","),
                  addy = paste0(addresses, ", ", city, ", ", state)
    )

After reading the data the next step is to gather the longitude and latitude for map usage. I use the ggmap package. (geocoded_data was ran previously and takes a while to run)

**NOTE:** ggmap requires some work in Google and registering your api. I will not go over that portion of this process. I originally attempted to use tidygeocoder, but found there is a HIPAA situation which prevented me from getting the correct longitude and latitude for specific facilities. I found using ggmap did not give me this error.


Code
# Get geocoordinates
#addys <- data %>% 
#  dplyr::distinct(addy)

# Obtain coordinates using ggmap
#geocoded_data <- addys %>%
#  ggmap::mutate_geocode(addy)
#readr::write_csv(geocoded_data, "geocoded_data.csv")
load(here::here(file = 'posts/select health/geocoded_data.rda'))

Then some additional data wrangling after obtaining all geocodes.


Code
# Join the coord with the providers data and wrangle furthur
df <- data %>% 
  dplyr::left_join(geocoded_data) %>% 
  dplyr::filter(!grepl("^\\(", addy)) %>% 
  dplyr::distinct(facility_name, facility_type, type, addy, lon, lat, group_name, languages, gender, first_name, last_name, specialty, network_tier)

group_data <- df %>% 
  dplyr::filter(type == 'GROUP') %>% 
  janitor::remove_empty('cols') %>% 
  dplyr::mutate(notes = paste0(group_name)) %>% 
  dplyr::distinct(type, addy, lon, lat, group_name, notes)

indiv_data <- df %>% 
  dplyr::filter(type == 'INDIVIDUAL') %>% 
  janitor::remove_empty('cols') %>% 
  dplyr::mutate(name = paste0(first_name, ' ', last_name),
                notes = paste0(gender, " - ", name, " - ", languages, " - ", specialty)) %>% 
  dplyr::distinct(type, addy, lon, lat, languages, gender, name, specialty, notes)

facility_data <- df %>% 
  dplyr::filter(type == 'FACILITY') %>% 
  janitor::remove_empty('cols') %>% 
  dplyr::mutate(notes = paste0(facility_name)) %>% 
  dplyr::distinct(facility_name, facility_type, type, addy, lon, lat, notes)

All types

Now lets do some mapping!


Code
# map!
leaflet() %>%
  leaflet::setView(lng = -111.349309, lat = 41.648323, zoom = 4) %>% 
  leaflet::addTiles() %>% 
  leaflet::addCircleMarkers(popup = group_data$notes, clusterOptions = leaflet::markerClusterOptions(), lng = group_data$lon, lat = group_data$lat, group = "Group", color = "red") %>%
  leaflet::addCircleMarkers(popup = facility_data$notes, clusterOptions = leaflet::markerClusterOptions(), lng = facility_data$lon, lat = facility_data$lat, group = "Facility", color = "#F1CC6E") %>%
  leaflet::addCircleMarkers(popup = indiv_data$notes, clusterOptions = leaflet::markerClusterOptions(), lng = indiv_data$lon, lat = indiv_data$lat, group = "Individual", color = "#66B0B1") %>%
  leaflet::addLegend(values = 'Group', group = "Group", position = "bottomleft", labels = "Group", colors = "red") %>%
  leaflet::addLegend(values = 'Facility', group = "Facility", position = "bottomleft", labels = "Facility", colors = "#F1CC6E") %>%
  leaflet::addLegend(values = 'Individual', group = "Individual", position = "bottomleft", labels = "Individual", colors = "#66B0B1") %>%
  leaflet::addLayersControl(overlayGroups = c("Group", "Facility", 'Individual'),
                   options = leaflet::layersControlOptions(collapsed = FALSE)) 

This is a relatively simple example of how to gather and visualize public data.


Individual providers map exploration

Additionally, using shiny we can go further with individual providers, identifying specific providers locations by gender, language and specialty.


Shiny Code
# load libraries ----
library(shiny)
library(shinyWidgets)
library(dplyr)
library(jsonlite)
library(DT)
library(leaflet)

# functions for map ----
viz_map <- function(df, threshold = NULL) {

  leaflet::leaflet() %>%
    leaflet::setView(lng = -111.349309, lat = 41.648323, zoom = 4) %>%
    leaflet::addTiles() %>%
    leaflet::addCircleMarkers(popup = df$notes,
                              clusterOptions = leaflet::markerClusterOptions(),
                              lng = df$lon,
                              lat = df$lat,
                              color = "red")
}

# load data ----
load(file = 'provider.rda')
load(file = 'geocoded_data.rda')

# data prep ----
data <- provider %>%
  dplyr::filter(type == 'INDIVIDUAL') %>%
  dplyr::mutate(addresses = purrr::map(addresses, as.data.frame),
                name = paste0(name$first, ' ', name$last)
                ) %>%
  tidyr::unnest(addresses, keep_empty = T) %>%
  tidyr::unnest(languages, keep_empty = T) %>%
  tidyr::unnest(specialty, keep_empty = T) %>%
  dplyr::mutate(addresses = stringr::str_replace_all(address, "&", ","),
                addy = paste0(addresses, ", ", city, ", ", state),
                notes = paste0(gender, " - ", name, " - ", languages, " - ", specialty)
                ) %>%
  dplyr::left_join(geocoded_data) %>%
  dplyr::distinct(type, addy, lon, lat, languages, gender, name, specialty, notes)

# shiny application ----
ui <- fluidPage(

  titlePanel("Select Health Individual providers map"),
  # Inputs
  column(2,
         fluidRow(selectInput(inputId = "gender", label = "Gender", choices = c('Female', 'Male'), selected = 'Female')),
         fluidRow(pickerInput(inputId = 'language', label = "Language", choices = NULL, selected = NULL, multiple = T, options = list(`actions-box` = TRUE))),
         fluidRow(pickerInput(inputId = 'specialty', label = "Specialty", choices = NULL, selected = NULL, multiple = T, options = list(`actions-box` = TRUE)))
         ),
  # Plot/Table
  mainPanel(
    column(12, leafletOutput('map')),
    column(12, DT::dataTableOutput('table'))
    ))

server <- function(input, output, session) {

  current_language <- reactiveVal()
  observe({current_language(input$language)})

  current_specialty <- reactiveVal()
  observe({current_specialty(input$specialty)})

  observeEvent(input$gender,{
    updatePickerInput(session, 'language', selected = current_language(), choices = unique(sort(data$languages[data$gender == input$gender])))
    })

  observeEvent(input$language,{
    updatePickerInput(session, 'specialty', selected = current_specialty(), choices = unique(sort(data$specialty[data$gender == input$gender & data$languages %in% input$language])))
  })

  # Map
  output$map <- renderLeaflet({
    map <- data %>%
      filter(gender == input$gender & languages %in% input$language & specialty %in% input$specialty)
    viz_map(map)
    })

  # Table
  output$table <- DT::renderDataTable({
    DT::datatable(
      data %>%
        filter(gender == input$gender & languages %in% input$language & specialty %in% input$specialty) %>%
        distinct(name, addy, specialty, languages),
      colnames = c('Name', 'Address', 'Specialty', 'Language'),
      rownames = FALSE,
      extensions = 'Buttons',
      class = 'display nowrap compact cell-border',
      filter = 'top',
      options = list(
        scrollX = TRUE,
        scrollY = "300px",
        dom = 'tB',
        buttons = c('copy', 'csv'),
        paging = FALSE,
        fixedHeader = TRUE)
      )
    })
  }

shinyApp(ui, server)


Additional data

This is a great resource for additional provider data outside of the select health network:

https://github.com/bayesimpact/encompass/blob/a4f47e384ef4fe4dc43c30423a1713c2c93dc87f/data/healthcare_gov/Machine_Readable_URL_PUF.csv#L272