Aggregate clustered OD flows into representative lines
Source:R/linestring_aggregation.R
aggregate_clustered_flows.Rd
This function aggregates flows within clusters and creates a single
representative line for each cluster. The start and end coordinates are
computed as weighted averages (weighted by flow counts or another variable),
or simple means if no weights are provided. Each cluster is represented
by one LINESTRING
.
Usage
aggregate_clustered_flows(flows, weight = NULL, crs = sf::st_crs(flows))
Arguments
- flows
An
sf
object containing OD flows with coordinates for origins (x
,y
) and destinations (u
,v
), acluster
column, and optionally acount
or other weighting variable.- weight
(optional) Name of a column in
flows
to use for weighting. IfNULL
(default), unweighted means are used.- crs
Coordinate reference system for the output (default: taken from
flows
).
Value
An sf
object with one line per cluster, containing:
count_total
: total weight (if provided), otherwise number of flowssize
: the cluster size (from the input, not recomputed)geometry
: aLINESTRING
representing the aggregated OD flow
Examples
# ----- 1. Basic Usage: A quick, runnable example ---
# This demonstrates the function with minimal, fast data preparation.
flows <- flowcluster::flows_leeds
# Create the required input columns in a single, fast pipeline
flows_clustered <- flows |>
add_xyuv() |>
# Manually create 3 dummy clusters for demonstration
dplyr::mutate(cluster = sample(1:3, size = nrow(flows), replace = TRUE)) |>
# The function requires a 'size' column, so we add it
dplyr::group_by(cluster) |>
dplyr::add_tally(name = "size") |>
dplyr::ungroup()
#> Extracting start and end coordinates from flow geometries...
#> Adding x, y, u, v columns to flow data...
#> Assigning unique flow IDs...
# Demonstrate the function
flows_agg_w <- aggregate_clustered_flows(flows_clustered, weight = "count")
print(flows_agg_w)
#> Simple feature collection with 3 features and 7 fields
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: -1.554577 ymin: 53.80406 xmax: -1.547084 ymax: 53.80727
#> Geodetic CRS: WGS 84
#> # A tibble: 3 × 8
#> cluster count_total size x y u v geometry
#> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <LINESTRING [°]>
#> 1 1 52412 3480 -1.55 53.8 -1.55 53.8 (-1.550539 53.80567, -1.549…
#> 2 2 52227 3411 -1.55 53.8 -1.55 53.8 (-1.554577 53.80727, -1.551…
#> 3 3 49308 3405 -1.55 53.8 -1.55 53.8 (-1.549737 53.80646, -1.547…
# ----- 2. Detailed Workflow (not run by default) ---
if (FALSE) { # \dontrun{
# This example shows the ideal end-to-end workflow, from raw data
# to clustering and finally aggregation. It is not run during checks
# because the clustering steps are too slow.
# a) Prepare the data by filtering and adding coordinates
flows_prep <- flowcluster::flows_leeds |>
sf::st_transform(3857) |>
add_flow_length() |>
filter_by_length(length_min = 5000, length_max = 12000) |>
add_xyuv()
# b) Calculate distances and cluster the flows
distances <- flow_distance(flows_prep, alpha = 1.5, beta = 0.5)
dmat <- distance_matrix(distances)
wvec <- weight_vector(dmat, flows_prep, weight_col = "count")
flows_clustered_real <- cluster_flows_dbscan(dmat, wvec, flows_prep, eps = 8, minPts = 70)
# c) Filter clusters and add a 'size' column
flows_clustered_real <- flows_clustered_real |>
dplyr::filter(cluster != 0) |> # Filter out noise points
dplyr::group_by(cluster) |>
dplyr::mutate(size = dplyr::n()) |>
dplyr::ungroup()
# d) Now, use the function on the clustered data
flows_agg_real <- aggregate_clustered_flows(flows_clustered_real, weight = "count")
print(flows_agg_real)
# e) Visualize the results
if (requireNamespace("tmap", quietly = TRUE)) {
library(tmap)
# This plot uses modern tmap v4 syntax.
tm_shape(flows_clustered_real, facet = "cluster") +
tm_lines(col = "grey50", alpha = 0.5) +
tm_shape(flows_agg_real) +
tm_lines(col = "red", lwd = 2) +
tm_layout(title = "Original Flows (Grey) and Aggregated Flows (Red)")
}
} # }