Aggregate clustered OD flows into representative lines
Source:R/linestring_aggregation.R
aggregate_clustered_flows.Rd
This function aggregates flows within clusters and creates a single
representative line for each cluster. The start and end coordinates are
computed as weighted averages (weighted by flow counts or another variable),
or simple means if no weights are provided. Each cluster is represented
by one LINESTRING
.
Usage
aggregate_clustered_flows(flows, weight = NULL, crs = sf::st_crs(flows))
Arguments
- flows
An
sf
object containing OD flows with coordinates for origins (x
,y
) and destinations (u
,v
), acluster
column, and optionally acount
or other weighting variable.- weight
(optional) Name of a column in
flows
to use for weighting. IfNULL
(default), unweighted means are used.- crs
Coordinate reference system for the output (default: taken from
flows
).
Value
An sf
object with one line per cluster, containing:
count_total
: total weight (if provided), otherwise number of flowssize
: the cluster size (from the input, not recomputed)geometry
: aLINESTRING
representing the aggregated OD flow
Examples
# ----- 1. Prepare the data
flows <- sf::st_transform(flows_leeds, 3857)
# Add flow lengths and coordinates
flows <- add_flow_length(flows)
# filter by length
flows <- filter_by_length(flows, length_min = 5000, length_max = 12000)
#> Flows remaining after filtering: 3237 (31.44%)
flows <- add_xyuv(flows)
#> Extracting start and end coordinates from flow geometries...
#> Adding x, y, u, v columns to flow data...
#> Assigning unique flow IDs...
# Calculate distances
distances <- flow_distance(flows, alpha = 1.5, beta = 0.5)
#> Adding coordinates data back onto the unique pairs ...
dmat <- distance_matrix(distances)
wvec <- weight_vector(dmat, flows, weight_col = "count")
# Cluster flows using DBSCAN
flows_clustered <- cluster_flows_dbscan(dmat, wvec, flows, eps = 8, minPts = 70)
# Filter out noise points and small clusters. Calculate size and count per cluster
flows_clustered <- flows_clustered |>
dplyr::filter(cluster != 0) |> # these are normally the noisepoints
dplyr::group_by(cluster) |>
dplyr::mutate(size = dplyr::n(),
count_cluster = sum(count)) |>
dplyr::ungroup() |>
# keep the biggest 9 clusters only (in terms of size)
dplyr::arrange(dplyr::desc(size)) |>
dplyr::slice_head(n = 9)
# ----- 2. Aggregation code
# Weighted aggregation (by flow counts)
flows_agg_w <- aggregate_clustered_flows(flows_clustered, weight = "count")
head(flows_agg_w)
#> Simple feature collection with 1 feature and 7 fields
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: -170927.8 ymin: 7138447 xmax: -161286.4 ymax: 7140008
#> Projected CRS: WGS 84 / Pseudo-Mercator
#> # A tibble: 1 × 8
#> cluster count_total size x y u v
#> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 17 77 348 -161286. 7140008. -170928. 7138447.
#> # ℹ 1 more variable: geometry <LINESTRING [m]>
# Unweighted aggregation
flows_agg_uw <- aggregate_clustered_flows(flows_clustered)
head(flows_agg_uw)
#> Simple feature collection with 1 feature and 7 fields
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: -170409.4 ymin: 7137591 xmax: -161023.5 ymax: 7140218
#> Projected CRS: WGS 84 / Pseudo-Mercator
#> # A tibble: 1 × 8
#> cluster count_total size x y u v
#> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 17 9 348 -161023. 7140218. -170409. 7137591.
#> # ℹ 1 more variable: geometry <LINESTRING [m]>
# ----- 3. Visualize the output ---
if (requireNamespace("tmap", quietly = TRUE)) {
library(tmap)
tm_shape(flows_clustered) +
tm_lines(
lwd = "count",
col = "grey30",
alpha = 0.7,
title.lwd = "No. of people (Original flows)",
scale = 10,
legend.col.show = FALSE,
showNA = FALSE) +
tm_shape(flows_agg_w) +
tm_lines(
lwd = "count_total",
col = "red",
palette = "Accent", # YlGn
alpha = 1,
title.col = "Cluster",
title.lwd = "No. of people (Representative linestring)",
scale = 10,
legend.outside = TRUE,
legend.outside.position = "bottom") +
tm_layout(
main.title = "Aggregating flows to representative linestrings per cluster",
legend.outside = TRUE,
legend.outside.position = "bottom",
legend.stack = "horizontal",
)
}
#>
#> ── tmap v3 code detected ───────────────────────────────────────────────────────
#> [v3->v4] `tm_lines()`: use `col_alpha` instead of `alpha`.
#> [v3->v4] `tm_lines()`: use `col.legend = tm_legend_hide()` instead of
#> `legend.col.show = FALSE`.
#> [v3->v4] `tm_tm_lines()`: migrate the argument(s) related to the scale of the
#> visual variable `lwd` namely 'scale' (rename to 'values.scale') to lwd.scale =
#> tm_scale_continuous(<HERE>).
#> ℹ For small multiples, specify a 'tm_scale_' for each multiple, and put them in
#> a list: 'lwd.scale = list(<scale1>, <scale2>, ...)'
#> [v3->v4] `tm_lines()`: use `col_alpha` instead of `alpha`.
#> [v3->v4] `tm_lines()`: migrate the argument(s) related to the legend of the
#> visual variable `col` namely 'title.col' (rename to 'title') to 'col.legend =
#> tm_legend(<HERE>)'
#> [tm_lines()] Arguments `legend.outside` and `legend.outside.position` unknown.
#> [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
#> The visual variable "lwd" of the layer "lines" contains a unique value. Therefore a discrete scale is applied (tm_scale_discrete).