Tyler B. Hampton: December 2018

Introduction

This tutorial uses the sf package to perform geoprocessing, and ggplot to visualize the data. Additionally, we practice downloading open-source data from the internet.

Adjust Settings, Load Packages

knitr::opts_chunk$set(echo = TRUE)
for(p in c("ggplot2","sf","dplyr","plyr","raster")){library(p,character.only = T)}
directory=".../Layers_Canada/CanadaPopulationDensity/"
theme_set(theme_bw()+
            theme(
              panel.background = element_blank(),axis.ticks = element_blank(),
              legend.background = element_blank(),panel.grid = element_line(colour = 0)
            ))

Download Open-Source data from open.canada.ca

setwd(directory)
if(!("CAdata.zip" %in% list.files())){
  #https://open.canada.ca/data/en/dataset/5be03a46-8504-40a7-a96c-af195bae0428
  download.file(url = "http://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/files-fichiers/gecu000e11a_e.zip",
                dest=paste0(directory,"CAdata.zip"))
}
if(!("CAcensusdata" %in% list.files())){unzip(zipfile = "CAdata.zip",exdir = "CAcensusdata")}
#https://open.canada.ca/data/en/dataset/ece81c43-aa4e-41ef-86c2-3835eb5aa95c
download.file(url = "http://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/CompFile.cfm?Lang=Eng&T=701&OFT=FULLCSV",dest=paste0(directory,"CAcensusdata/CensusData.csv"))
if(!("Borders.zip" %in% list.files())){
  #http://thematicmapping.org/downloads/world_borders.php
  download.file(url = "http://thematicmapping.org/downloads/TM_WORLD_BORDERS-0.3.zip",dest=paste0(directory,"Borders.zip"))
}
setwd(setwd(directory))
if(!("WorldBorders" %in% list.files())){unzip(zipfile = "Borders.zip",exdir = "WorldBorders")}

Inspect the Canadian Census data from 2016.

We see that population within census divisions is log-normally distributed. We can also confirm that the population density calculations within each census division match the population and area values. The population of Canada in 2016 was 35.2 million people.

census=read.csv(paste0(directory,"CAcensusdata/CensusData.csv"))
ggplot(data=census)+
  geom_histogram(aes(x=Population..2016))+
  scale_x_log10()

# Confirm that Population Density calculations match areas
ggplot(data=census)+
  geom_point(aes(y=Population..2016,x=Land.area.in.square.kilometres..2016*Population.density.per.square.kilometre..2016))+
  geom_abline(intercept=0,slope=1)

print(paste(round(sum(census$Population..2016,na.rm=TRUE)*10^-6,1),"million people"))

## [1] "35.2 million people"

Inspect GIS Data

proj="+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs "

CAshp=st_read(paste0(directory,"CAcensusdata/gcd_000e11a_e.shp"))

## Reading layer `gcd_000e11a_e' from data source `...\Layers_Canada\CanadaPopulationDensity\CAcensusdata\gcd_000e11a_e.shp' using driver `ESRI Shapefile'
## Simple feature collection with 293 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -141.0181 ymin: 41.71096 xmax: -52.61941 ymax: 83.1355
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs

CAshp=st_transform(CAshp,proj)
CAshp=dplyr::left_join(CAshp,census,by=c("CDUID"="Geographic.code"))

ggplot()+
  geom_sf(data=CAshp)+
  ggtitle("Canada Census Divisions")

borders=st_read(paste0(directory,"WorldBorders/TM_WORLD_BORDERS-0.3.shp"))

## Reading layer `TM_WORLD_BORDERS-0.3' from data source `...\Layers_Canada\CanadaPopulationDensity\WorldBorders\TM_WORLD_BORDERS-0.3.shp' using driver `ESRI Shapefile'
## Simple feature collection with 246 features and 11 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -180 ymin: -90 xmax: 180 ymax: 83.6236
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

USA=borders[borders$NAME=="United States",c("NAME","geometry")]
USA=st_transform(USA,proj)
st_crs(USA)$units

## [1] "m"

USA_buf=st_buffer(USA,dist=160934) # 100 miles = 160934 meters

ggplot()+
  geom_sf(data=USA,fill="gray",col="transparent")+
  geom_sf(data=USA_buf,fill="transparent",col=2)+
  ggtitle("US Border and 100 mile buffer")

Inspect Population Density Data

We can see that the region within 100 miles of the US border is highly populated, but there are cities like Calgary, Edmonton, and Saskatoon that are north of this boundary.

ggplot()+
  geom_sf(data=CAshp,aes(fill=log10(Population.density.per.square.kilometre..2016)),col="transparent")+
  geom_sf(data=USA_buf,col=2,fill="transparent")+
  coord_sf(xlim = extent(CAshp)[1:2],ylim = extent(CAshp)[3:4])+
  ggtitle("Canadian Census Divisions and Population Density")+
  scale_fill_gradient(name="Pop_dens",low="darkblue",high="red",na.value="darkblue",
                      limits=c(-1.2,3.5),breaks=c(-1:3),labels=(10^c(-1:3)))

Use st_intersection to extract census tracts

We use st_intersection to intersect the buffered US borer polygon and Canadian census tracts. We assume population is evenly distributed within census divisions, allowing us to adjust the size of intersected tracts for their new clipped size. Inspecting their new calculated areas, we see that many tracts are completely within the 100 mile buffer, but others have a much lower area.
Our final calculation finds that 27.3 million people live within 100 miles of the US border, or 77.7% of the population.

intersect=st_intersection(CAshp,USA_buf)
plot=ggplot()+
  geom_sf(data=CAshp,fill="gray",col="transparent")+
  geom_sf(data=intersect,aes(fill=log10(Population.density.per.square.kilometre..2016)),col="transparent")+
  ggtitle("Population of Canada within 100 miles of the US border")+
  scale_fill_gradient(name=expression(paste("Persons km",NA^-2)),
                      low="darkblue",high="red",na.value="darkblue",
                      limits=c(-1.2,3.5),breaks=c(-1:3),labels=(10^c(-1:3)))+
  theme(legend.position = c(0.85,0.8),axis.ticks = element_blank(),
        axis.text = element_blank(),axis.title = element_blank())
ggsave(plot,filename = paste0(directory,"BufferPopDens.png"),width = 5,height=4,dpi=300,units="in")
plot

intersect=intersect%>%mutate(area_m2 = st_area(.) %>% as.numeric())
intersect$area_km2=intersect$area_m2*(1000^-2)

ggplot(data=intersect)+
  geom_point(aes(y=area_km2,x=Land.area.in.square.kilometres..2016))+
  scale_y_log10()+scale_x_log10()+
  geom_abline(intercept=0,slope=1)

intersect$pop_2016_intersect=intersect$area_km2*intersect$Population.density.per.square.kilometre..2016
paste0(round(sum(intersect$pop_2016_intersect)*10^-6,1)," million people")

## [1] "27.3 million people"

print(paste0(round(100*sum(intersect$pop_2016_intersect,na.rm=TRUE)/sum(census$Population..2016,na.rm=TRUE),1),"% of population within 100 miles of the US border"))

## [1] "77.7% of population within 100 miles of the US border"

What about Canada's population in relation to some US city? Seattle?

lon=seq(-140,-45,2)
df <- data.frame(lon=c(lon,rev(lon)),lat=c(rep(47.6,length(lon)),rep(30,length(lon))))
df <- rbind(df, df[1,])
seattle <- st_sf(st_sfc(st_polygon(list(as.matrix(df)))), crs = 4326)
seattle=st_transform(seattle,proj)

intersect=st_intersection(seattle,CAshp)
intersect=intersect%>%mutate(area_m2 = st_area(.) %>% as.numeric())
intersect$area_km2=intersect$area_m2*(1000^-2)
intersect$pop_2016_intersect=intersect$area_km2*intersect$Population.density.per.square.kilometre..2016


ggplot()+
  geom_sf(data=CAshp,fill="gray",col="transparent")+
  geom_sf(data=intersect,aes(fill=log10(Population.density.per.square.kilometre..2016)),col="transparent")+
  geom_sf(data=seattle,col=2,fill="transparent")+
  coord_sf(xlim = extent(CAshp)[1:2],ylim = extent(CAshp)[3:4])+
  ggtitle("Population of Canada south of Seattle, Washington")+
  scale_fill_gradient(name=expression(paste("Persons km",NA^-2)),
                      low="darkblue",high="red",na.value="darkblue",
                      limits=c(-1.2,3.5),breaks=c(-1:3),labels=(10^c(-1:3)))+
  theme(legend.position = c(0.85,0.8),axis.ticks = element_blank(),
        axis.text = element_blank(),axis.title = element_blank())

paste0(round(sum(intersect$pop_2016_intersect)*10^-6,1)," million people")

## [1] "23.3 million people"

print(paste0(round(100*sum(intersect$pop_2016_intersect,na.rm=TRUE)/sum(census$Population..2016,na.rm=TRUE),1),"% of population south of Seattle, Washington State"))

## [1] "66.2% of population south of Seattle, Washington State"

Tyler B. Hampton

Pages

Population of Canada within 100 miles of the US border, with Open Data and R!

Introduction

Adjust Settings, Load Packages

Download Open-Source data from open.canada.ca

Inspect the Canadian Census data from 2016.

Inspect GIS Data

Inspect Population Density Data

Use st_intersection to extract census tracts

What about Canada's population in relation to some US city? Seattle?

Home

Current Projects

Twitter

Blog Archive

Henry David Thoreau Scholar

Posts

Curriculum Vitae

Google Scholar

ORCID

Tyler Sets Sail!

LinkedIn

eBird