Snap a point to the closest point on a line segment using sf
Asked Answered
R

4

20

I would like to snap a point to the closest point on a road segment using sf::st_snap. However, the function seems to return the wrong result, it's snapping my point to a point at the start of the road segment. Any ideas on how to fix this?

Reproducible example provided below, including a comparision of the results I get when using sf::st_snap vs maptools::snapPointsToLines

Using sf::st_snap

# Max distance
  cut_dist = 200 # meters

# snap points to closest road
  new_point <- sf::st_snap(point1, roads, tolerance = cut_dist)

# view points on the map
  mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point) + mapView(roads) 

# Distance between pont1 and new_point
  st_distance( point1, new_point)
> 1591 meters # note this is above the set maximun distance

enter image description here

Using maptools::snapPointsToLines (the result I would expect)

# convert sf to sp
  point_sp <-  as_Spatial(point1)
  roads_sp <-  as_Spatial(roads)

# snap points
  new_point_sp <- snapPointsToLines(point_sp, roads_sp, maxDist = cut_dist)

# view points on the map
  mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point_sp) + mapView(roads) 

# Distance between pont1 and new_point
  spDistsN1( point_sp, new_point_sp)
>  116 meters

enter image description here

Data and libraries

library(sf)
library(mapview)
library(maptools)
library(sp)

point1 <- structure(list(idhex = 9L, geometry = structure(list(structure(c(665606.970079183, 
          6525003.41418009), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
          "sfc"), precision = 0, bbox = structure(c(xmin = 665606.970079183, 
          ymin = 6525003.41418009, xmax = 665606.970079183, ymax = 6525003.41418009
          ), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(idhex = NA_integer_), .Label = c("constant", 
                                                                                                                                                         "aggregate", "identity"), class = "factor"), row.names = 2L, class = c("sf", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                              "data.table", "data.frame"))


   roads <- structure(list(id = 139885, osm_id = 250886593, geometry = structure(list(
        structure(c(665387.589147313, 665367.867159783, 665363.008143169, 
        665363.051860059, 665363.308104069, 665366.519781353, 665368.635421323, 
        665370.846894641, 665370.829724196, 665367.910645335, 665361.777524054, 
        665355.967776345, 665351.649946698, 665343.44353825, 665334.917779131, 
        665313.306069501, 665309.001351385, 665310.66019677, 665313.528620709, 
        665341.742306731, 665351.854389331, 665354.981775569, 665360.254611229, 
        665365.006104512, 665379.034588542, 665394.435039616, 665409.282519288, 
        665410.676785182, 665448.890797438, 665458.917562631, 665471.042094353, 
        665485.485001236, 665495.899212422, 665504.535684257, 665509.674854913, 
        665506.145837246, 665483.727146874, 665481.426949686, 665462.311063365, 
        665445.215460573, 665450.424049663, 665450.837897892, 665491.036360788, 
        665491.419140717, 665469.507518623, 665458.677850808, 665455.926197775, 
        665462.873809047, 665460.283684337, 665426.046702616, 665396.279686035, 
        665368.373253059, 665357.878521323, 665304.347529357, 665221.04051616, 
        665170.777462125, 665144.670345016, 665106.030568334, 665073.2789218, 
        665018.208956171, 664947.693178271, 664921.708297412, 664861.659061389, 
        664797.900403384, 664745.001666066, 664730.200174759, 664717.892651619, 
        664706.473711845, 664697.750102392, 664688.215719591, 664681.544531593, 
        664672.960647368, 664665.064067202, 664636.446517023, 664622.930521655, 
        664518.065243846, 664442.725560545, 664423.048166559, 664411.132259582, 
        664407.05972929, 664398.364646172, 664391.348502443, 664382.558239303, 
        664372.012526058, 664354.354954718, 664332.995014599, 664311.609706282, 
        664271.102641808, 664228.816287751, 664150.088321471, 664069.895400484, 
        6526138.02793883, 6526135.40749336, 6526130.11578605, 6526111.34403368, 
        6526087.4978365, 6526054.13445288, 6526022.49962268, 6525982.74830288, 
        6525959.40435839, 6525944.55197219, 6525918.33886077, 6525894.18611795, 
        6525874.55473851, 6525840.53410542, 6525813.96628006, 6525767.42907088, 
        6525745.21917638, 6525733.51582599, 6525713.24841331, 6525627.57847652, 
        6525608.06984863, 6525568.30170735, 6525550.71644271, 6525539.76231607, 
        6525491.25651378, 6525446.12690364, 6525433.36256694, 6525431.23562504, 
        6525372.98235432, 6525354.13376808, 6525331.3288195, 6525309.59511696, 
        6525293.92174422, 6525270.21980161, 6525256.11455612, 6525228.35885783, 
        6525217.10943051, 6525215.95489587, 6525195.91355696, 6525158.79257025, 
        6525134.01851773, 6525131.70940566, 6525050.96446632, 6524950.68358502, 
        6524851.23226232, 6524806.24052727, 6524749.34394609, 6524714.63617193, 
        6524660.07336072, 6524612.21010524, 6524583.84484865, 6524562.03540982, 
        6524557.38094998, 6524533.67136837, 6524510.74454804, 6524495.56823805, 
        6524486.9387399, 6524475.63373441, 6524465.4404841, 6524468.04929815, 
        6524475.95178632, 6524478.86036788, 6524470.76472937, 6524447.96214429, 
        6524448.06967557, 6524443.4855897, 6524435.86812114, 6524425.93373791, 
        6524417.67487537, 6524409.79262886, 6524399.64960133, 6524378.79085156, 
        6524360.33496349, 6524303.24355601, 6524302.70486651, 6524293.01335665, 
        6524290.81442892, 6524298.30279414, 6524309.46697681, 6524313.27442914, 
        6524337.22831533, 6524364.43083297, 6524376.27944935, 6524382.92319852, 
        6524389.6474774, 6524406.74565716, 6524430.82326744, 6524462.46041311, 
        6524492.20009833, 6524544.74318075, 6524591.10483188), .Dim = c(91L, 
        2L), class = c("XY", "LINESTRING", "sfg"))), class = c("sfc_LINESTRING", 
        "sfc"), precision = 0, bbox = structure(c(xmin = 664069.895400484, 
        ymin = 6524290.81442892, xmax = 665509.674854913, ymax = 6526138.02793883
        ), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 139885L, class = c("sf", 
        "data.frame"), sf_column = "geometry", agr = structure(c(id = NA_integer_, 
        osm_id = NA_integer_), .Label = c("constant", "aggregate", "identity"
        ), class = "factor"))
Roulers answered 11/7, 2018 at 19:29 Comment(0)
S
22

I don't know why st_snap returns the start/end point of the linestring. Maybe that is something for an issue at https://github.com/r-spatial/sf.

Here's a workaround that seems to identify the correct point. Note that st_nearest_points was only introduced recently, so you may need to install sf from github.

nrst = st_nearest_points(point1, roads)
new_point2 = st_cast(nrst, "POINT")[2]

mapView(point1, color="red") + st_buffer(point1, dist = cut_dist) + new_point2 + roads

Wrapping this is a function to return a new geometry set with snapped points below a certain max_dist:

library(sf)
library(mapview)

st_snap_points = function(x, y, max_dist = 1000) {

  if (inherits(x, "sf")) n = nrow(x)
  if (inherits(x, "sfc")) n = length(x)

  out = do.call(c,
                lapply(seq(n), function(i) {
                  nrst = st_nearest_points(st_geometry(x)[i], y)
                  nrst_len = st_length(nrst)
                  nrst_mn = which.min(nrst_len)
                  if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
                  return(st_cast(nrst[nrst_mn], "POINT")[2])
                })
  )
  return(out)
}

brew = st_transform(breweries, st_crs(trails))

tst = st_snap_points(brew, trails, 500)

mapview(breweries) + mapview(tst, col.regions = "red") + trails
Superfetation answered 12/7, 2018 at 7:47 Comment(7)
Hi @TimSalabim. This is a nice workaround (thanks!) but it does not have the a similar tolerance attribute to set a maximum distance.Roulers
True. Though st_nearest_points returns a linestring which you could measure with st_length and compare to your desired tolerance.Superfetation
I've expanded your answer creating a function that sets a maximum distance tolerance. As it stands, however, it works to snap a single point but it doesn't work to snap multiple points of a single sf data.frameRoulers
Your function wasn't working! Hence, edited the answerSuperfetation
This looks much better. Thanks. Perhaps you wanna suggest this function to be incorporated in the sf library github.com/r-spatial/sf/issues/792Roulers
Over two years later and this hasn't been implemented into the standard sf package??? I followed the github discussion and I don't understand why it was the suggestion was rejected. Thanks for the function, it works perfectly!Cottrell
Great function, nice workPerusal
P
11

Given that this was answered almost four years ago, I thought I'd add an alternative and faster method which may not have been possible at the time. @TimSalabim's st_snap_points() function is great, but if you've got an sf collection with a few points, it can be done a bit faster using st_nearest_points() directly on the sf collection.

As mentioned by Tim and others in the discussion, st_nearest_points() returns a LINESTRING object - which is kinda confusing at first but can be easily turned into a POINT. It sounds like the order of the points in the LINESTRING always goes from the first to the second geometry, so you can pick out either the start or endpoint of the LINESTRING, depending which order you put the geometries into st_nearest_points().

See ?st_nearest_points:

Value

an sfc object with all two-point LINESTRING geometries of point pairs from the first to the second geometry, of length x * y, with y cycling fastest. See examples for ideas how to convert these to POINT geometries.

Using your example, we can find the closest point on the road by making the road the second object, and retrieving the endpoint (second point) of the LINESTRING as follows:

# find the LINESTRING object which is the shortest line between the two geometries
st_nearest_points(point1, roads) %>% 
  {. ->> my_linestring}

my_linestring

# Geometry set for 1 feature 
# Geometry type: LINESTRING
# Dimension:     XY
# Bounding box:  xmin: 665491.2 ymin: 6525003 xmax: 665607 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# LINESTRING (665607 6525003, 665491.2 6525003)
  

# then, convert the LINESTRING to POINTs
#    and, pull out the second point, because we want the point on the 'roads' object, 
#    which was supplied second to st_nearest_points()
my_linestring %>% 
  st_cast('POINT') %>% 
  .[2] %>%
  {. ->> closest_point}

closest_point

# Geometry set for 1 feature 
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 665491.2 ymin: 6525003 xmax: 665491.2 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# POINT (665491.2 6525003)

And we can look at it on a map to confirm:

library(tidyverse)

ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = point1, col = 'black')+
  geom_sf(data = closest_point, col = 'blue')

enter image description here

To find the distance between the points we use st_distance():

# work out the distance
st_distance(point1, closest_point)[1]
# 115.7514 [m]

To incorporate a distance threshold into the "snapping", we can make a function which includes an if statement:

# we can combine the code and snap the point conditionally
f1 <- function(x, y, max_dist){
  
  st_nearest_points(x, y) %>% 
    {. ->> my_linestring} %>% 
    st_cast('POINT') %>% 
    .[2] %>%
    {. ->> closest_point} %>% 
    st_distance(., x) %>% 
    as.numeric %>% 
    {. ->> my_distance}
  
  if(my_distance <= max_dist){
    
    return(closest_point)
    
  } else {
      
    return(st_geometry(point1))
    
  }
  
}

# run the function with threshold of 400 m (it should snap)
f1(point1, roads, max_dist = 400) %>% 
  {. ->> p1_400}

# run the function with a threshold of 50 m (it should NOT snap)
f1(point1, roads, max_dist = 50) %>% 
  {. ->> p1_50}

# plot it to see the results
ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = p1_400, col = 'blue')+
  geom_sf(data = p1_50)
# the blue point is on the road because it was within the threshold
# the black point is in the original spot because it was outside the threshold

enter image description here



Working on an sf collection rather than a single point

Now, I'll assume that many instances will require this to be done on multiple points rather than a single point, and we can modify it slightly to use this approach on an sf collection with multiple points. To demonstrate this, first make up 100 random points within the bbox of roads:

set.seed(69)

# let's make some random points around the road
tibble(
  x = runif(n = 100, min = st_bbox(roads)$xmin, max = st_bbox(roads)$xmax),
  y = runif(n = 100, min = st_bbox(roads)$ymin, max = st_bbox(roads)$ymax), 
  id = 1:100
) %>% 
  st_as_sf(
    x = ., 
    coords = c('x', 'y'), 
    crs = st_crs(roads)
  ) %>% 
  {. ->> my_points}

my_points

# Simple feature collection with 100 features and 1 field
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 2
#      id           geometry
# * <int>        <POINT [m]>
# 1     1 (664834.1 6525742)
# 2     2 (665176.8 6524370)
# 3     3 (664999.9 6525528)
# 4     4 (665315.7 6525242)
# 5     5   (664601 6525464)
# 6     6 (665320.7 6525600)
# 7     7 (664316.2 6525065)
# 8     8   (665204 6526025)
# 9     9 (664319.8 6524335)
# 10    10 (664101.7 6525830)
# # ... with 90 more rows


# now show the points on a figure
ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = my_points, shape = 1)

enter image description here

To find the closest point on roads to each of the random points, we use the same functions as the single point example above, but we save the LINESTRING as a column for each point. Then, instead of retrieving just the second point of the LINESTRING, we pull out every second point of the collection (column) of LINETRINGs. You have the option of pulling out the second point of every LINESTRING on a row-by-row basis, by including rowwise() before mutate(), but this makes for much slower computation. Instead, by working on the sf collection as a whole and pulling out every second point (the second point from each row), you get the result much much faster. This is how they demonstrate it in the example of ?st_nearest_points.

So basically, we make new columns in the sf collection using mutate() which includes the LINESTRING and the closest POINT.

# now, let's find the closest points
my_points %>% 
  mutate(
    my_linestring = st_nearest_points(geometry, roads),
    closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
  ) %>% 
  {. ->> closest_points}

closest_points

# Simple feature collection with 100 features and 1 field
# Active geometry column: geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 4
#      id           geometry                        my_linestring      closest_point
# * <int>        <POINT [m]>                     <LINESTRING [m]>        <POINT [m]>
# 1     1 (664834.1 6525742)   (664834.1 6525742, 665309 6525745)   (665309 6525745)
# 2     2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)
# 3     3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)
# 4     4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)
# 5     5   (664601 6525464)   (664601 6525464, 665317.8 6525700) (665317.8 6525700)
# 6     6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)
# 7     7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)
# 8     8   (665204 6526025)   (665204 6526025, 665367.7 6526036) (665367.7 6526036)
# 9     9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)
# 10    10 (664101.7 6525830)   (664101.7 6525830, 665309 6525745)   (665309 6525745)
# # ... with 90 more rows


# and have a look at a figure
ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = my_points, shape = 1)+
  geom_sf(data = closest_points$my_linestring, linetype = 'dotted')+
  geom_sf(data = closest_points$closest_point, shape = 1, col = 'blue')

enter image description here

If you want to include a distance threshold in the analysis, we calculate the distance between the points (or length of my_linestring), and create a new geometry column, conditionally containing either the original geometry or the new snapped point, depending on distance (see here). We use ifelse() and a threshold of 400 m.

# now, use the closest_point if the distance is within a threshold, otherwise keep the original point
closest_points %>% 
  mutate(
    distance = st_distance(geometry, roads)[,1],
    distance2 = st_length(my_linestring), # not used, demo purposes only
    snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
  ) %>% 
  {. ->> closest_points_threshold}

closest_points_threshold

# Simple feature collection with 100 features and 3 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 7
#      id           geometry                        my_linestring      closest_point distance distance2 snapped_point_cond
# * <int>        <POINT [m]>                     <LINESTRING [m]>        <POINT [m]>      [m]       [m]        <POINT [m]>
# 1     1 (664834.1 6525742)   (664834.1 6525742, 665309 6525745)   (665309 6525745)    475.      475.  (664834.1 6525742)
# 2     2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)    121.      121.  (665142.8 6524486)
# 3     3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)    356.      356.  (665337.9 6525639)
# 4     4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)    153.      153.  (665454.2 6525178)
# 5     5   (664601 6525464)   (664601 6525464, 665317.8 6525700) (665317.8 6525700)    755.      755.    (664601 6525464)
# 6     6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)     31.2      31.2 (665348.4 6525615)
# 7     7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)    534.      534.  (664316.2 6525065)
# 8     8   (665204 6526025)   (665204 6526025, 665367.7 6526036) (665367.7 6526036)    164.      164.  (665367.7 6526036)
# 9     9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)     64.5      64.5 (664354.4 6524390)
# 10    10 (664101.7 6525830)   (664101.7 6525830, 665309 6525745)   (665309 6525745)   1210.     1210.  (664101.7 6525830)
# # ... with 90 more rows


# and plot it
ggplot()+
  geom_sf(data = roads %>% st_buffer(400), col = 'red', fill = NA)+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = closest_points_threshold$snapped_point_cond, shape = 1, col = 'blue')+
  geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400) %>% .$my_linestring, linetype = 'dotted')+
  geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400), shape = 1)

enter image description here

So, blue points are the final threshold-snapped points. As we can see, points outside the 400 m buffer (red polygon) remain in their original position and anything inside (black points and dotted lines) is snapped to the road.



Comparing st_nearest_points() to @TimSalabim's st_snap_points()

Time-wise, the st_nearest_points() method seems to be faster than st_snap_points(). As far as I can gather (and this is not my strong suit), this is because st_snap_points() works on a point-by-point basis, or row-by-row basis when used on an sf collection, whereas we are able to work on the entire collection of LINESTRINGs at once using st_nearest_points().

For the example of 100 points, we are talking <0.2 secs vs ~11 secs.

Additionally, if we try a point-by-point (rowwise()) approach to st_nearest_points(), it is the slowest of all three approaches, coming it at ~15 secs.

#~~~~~ st_nearest_points() ~~~~~

system.time(
  my_points %>% 
    mutate(
      my_linestring = st_nearest_points(geometry, roads),
      closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
      distance = st_distance(geometry, roads)[,1],
      snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
    ) %>% 
    {. ->> closest_points_f1}
)
# <0.2 secs


#~~~~~ st_snap_points() ~~~~~

st_snap_points = function(x, y, max_dist = 1000) {
  
  if (inherits(x, "sf")) n = nrow(x)
  if (inherits(x, "sfc")) n = length(x)
  
  out = do.call(c,
                lapply(seq(n), function(i) {
                  nrst = st_nearest_points(st_geometry(x)[i], y)
                  nrst_len = st_length(nrst)
                  nrst_mn = which.min(nrst_len)
                  if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
                  return(st_cast(nrst[nrst_mn], "POINT")[2])
                })
  )
  return(out)
}

system.time(
  st_snap_points(my_points, roads, max_dist = 400) %>% 
    {. ->> closest_points_f2}
)
# 11 secs


#~~~~~ st_nearest_points() rowwise ~~~~~

system.time(
  my_points %>% 
    rowwise %>% 
    mutate(
      my_linestring = st_nearest_points(geometry, roads),
      closest_point = st_cast(my_linestring, 'POINT')[2], # pull out the second point of each row
      distance = st_distance(geometry, roads)[,1],
      snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
    ) %>% 
    {. ->> closest_points_f1_rowwise}
)
# 15 secs

All three methods produce the same final figure:

ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = closest_points_f1$snapped_point_cond, shape = 1, col = 'blue')

enter image description here

However, the st_nearest_points() method(s) returns an sf collection with all the working out columns, whereas st_snap_points() only produces the final snapped points. I reckon having all the original and working columns is pretty useful, especially for troubleshooting. Plus, this method is significantly faster.

closest_points_f1

# Simple feature collection with 100 features and 2 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 6
#      id           geometry                        my_linestring      closest_point distance snapped_point_cond
# * <int>        <POINT [m]>                     <LINESTRING [m]>        <POINT [m]>      [m]        <POINT [m]>
# 1     1 (664834.1 6525742)   (664834.1 6525742, 665309 6525745)   (665309 6525745)    475.  (664834.1 6525742)
# 2     2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)    121.  (665142.8 6524486)
# 3     3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)    356.  (665337.9 6525639)
# 4     4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)    153.  (665454.2 6525178)
# 5     5   (664601 6525464)   (664601 6525464, 665317.8 6525700) (665317.8 6525700)    755.    (664601 6525464)
# 6     6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)     31.2 (665348.4 6525615)
# 7     7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)    534.  (664316.2 6525065)
# 8     8   (665204 6526025)   (665204 6526025, 665367.7 6526036) (665367.7 6526036)    164.  (665367.7 6526036)
# 9     9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)     64.5 (664354.4 6524390)
# 10    10 (664101.7 6525830)   (664101.7 6525830, 665309 6525745)   (665309 6525745)   1210.  (664101.7 6525830)
# # ... with 90 more rows

closest_points_f2

# Geometry set for 100 features 
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664069.9 ymin: 6524380 xmax: 665480.3 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# First 5 geometries:
# POINT (664834.1 6525742)
# POINT (665142.8 6524486)
# POINT (665337.9 6525639)
# POINT (665454.2 6525178)
# POINT (664601 6525464)

I ran the example again with 1000 points, and st_neareast_points() took ~0.31 secs (1.5x longer), and st_snap_points() took 120 secs (~10x longer). I also ran st_nearest_points() on 10,000 points and it took ~2.5 secs (12x longer than 100 points; 8x longer than 1000 points). I did not run st_snap_points() on 10,000 rows.

Perusal answered 17/5, 2022 at 2:11 Comment(1)
This is a very thorough and much appreciated answer. Upvoted !Roulers
C
2

I had a similar problem, but where I had many points and also many line features. That is, I needed to find the nearest geometry point contained within a collection of line features for each point within my point feature collection.

Expanding on the brilliantly thorough answer by hugh-allan, I found a minimally modified solution that allows for this.

Example Data and Libraries

For the example, we'll use the randomly created points from hugh-allen, and the original roads (albeit defined differently below to match current sf expectations re: crs), but we'll add a second line feature to roads, which is simply the original road X coordinates offset to the west and north by 500 meters.

library(sf)
library(dplyr)
library(ggplot2)

##  hugh-allen's random points
set.seed(69)
tibble(
  x = runif(n = 100, min = st_bbox(roads)$xmin, max = st_bbox(roads)$xmax),
  y = runif(n = 100, min = st_bbox(roads)$ymin, max = st_bbox(roads)$ymax), 
  id = 1:100
) %>% 
  st_as_sf(
    x = ., 
    coords = c('x', 'y'), 
    crs = st_crs(roads)
  ) %>% 
  {. ->> my_points}

roads <- matrix(data = c(665387.589147313, 665367.867159783, 665363.008143169, 
                         665363.051860059, 665363.308104069, 665366.519781353, 665368.635421323, 
                         665370.846894641, 665370.829724196, 665367.910645335, 665361.777524054, 
                         665355.967776345, 665351.649946698, 665343.44353825, 665334.917779131, 
                         665313.306069501, 665309.001351385, 665310.66019677, 665313.528620709, 
                         665341.742306731, 665351.854389331, 665354.981775569, 665360.254611229, 
                         665365.006104512, 665379.034588542, 665394.435039616, 665409.282519288, 
                         665410.676785182, 665448.890797438, 665458.917562631, 665471.042094353, 
                         665485.485001236, 665495.899212422, 665504.535684257, 665509.674854913, 
                         665506.145837246, 665483.727146874, 665481.426949686, 665462.311063365, 
                         665445.215460573, 665450.424049663, 665450.837897892, 665491.036360788, 
                         665491.419140717, 665469.507518623, 665458.677850808, 665455.926197775, 
                         665462.873809047, 665460.283684337, 665426.046702616, 665396.279686035, 
                         665368.373253059, 665357.878521323, 665304.347529357, 665221.04051616, 
                         665170.777462125, 665144.670345016, 665106.030568334, 665073.2789218, 
                         665018.208956171, 664947.693178271, 664921.708297412, 664861.659061389, 
                         664797.900403384, 664745.001666066, 664730.200174759, 664717.892651619, 
                         664706.473711845, 664697.750102392, 664688.215719591, 664681.544531593, 
                         664672.960647368, 664665.064067202, 664636.446517023, 664622.930521655, 
                         664518.065243846, 664442.725560545, 664423.048166559, 664411.132259582, 
                         664407.05972929, 664398.364646172, 664391.348502443, 664382.558239303, 
                         664372.012526058, 664354.354954718, 664332.995014599, 664311.609706282, 
                         664271.102641808, 664228.816287751, 664150.088321471, 664069.895400484, 
                         6526138.02793883, 6526135.40749336, 6526130.11578605, 6526111.34403368, 
                         6526087.4978365, 6526054.13445288, 6526022.49962268, 6525982.74830288, 
                         6525959.40435839, 6525944.55197219, 6525918.33886077, 6525894.18611795, 
                         6525874.55473851, 6525840.53410542, 6525813.96628006, 6525767.42907088, 
                         6525745.21917638, 6525733.51582599, 6525713.24841331, 6525627.57847652, 
                         6525608.06984863, 6525568.30170735, 6525550.71644271, 6525539.76231607, 
                         6525491.25651378, 6525446.12690364, 6525433.36256694, 6525431.23562504, 
                         6525372.98235432, 6525354.13376808, 6525331.3288195, 6525309.59511696, 
                         6525293.92174422, 6525270.21980161, 6525256.11455612, 6525228.35885783, 
                         6525217.10943051, 6525215.95489587, 6525195.91355696, 6525158.79257025, 
                         6525134.01851773, 6525131.70940566, 6525050.96446632, 6524950.68358502, 
                         6524851.23226232, 6524806.24052727, 6524749.34394609, 6524714.63617193, 
                         6524660.07336072, 6524612.21010524, 6524583.84484865, 6524562.03540982, 
                         6524557.38094998, 6524533.67136837, 6524510.74454804, 6524495.56823805, 
                         6524486.9387399, 6524475.63373441, 6524465.4404841, 6524468.04929815, 
                         6524475.95178632, 6524478.86036788, 6524470.76472937, 6524447.96214429, 
                         6524448.06967557, 6524443.4855897, 6524435.86812114, 6524425.93373791, 
                         6524417.67487537, 6524409.79262886, 6524399.64960133, 6524378.79085156, 
                         6524360.33496349, 6524303.24355601, 6524302.70486651, 6524293.01335665, 
                         6524290.81442892, 6524298.30279414, 6524309.46697681, 6524313.27442914, 
                         6524337.22831533, 6524364.43083297, 6524376.27944935, 6524382.92319852, 
                         6524389.6474774, 6524406.74565716, 6524430.82326744, 6524462.46041311, 
                         6524492.20009833, 6524544.74318075, 6524591.10483188),
                ncol= 2) %>%
  as.data.frame() %>%
  rename(X = names(.)[1],
         Y = names(.)[2]) %>%
  mutate(id = 139885) %>%
  rbind(data.frame("id" = 22222,
                   "X" = .$X-500,
                   "Y" = .$Y+500)) %>%
  st_as_sf(coords = c("X","Y")) %>%
  st_set_crs(value = 32633) %>%
  group_by(id) %>%
  summarise(do_union = FALSE) %>%
  st_cast("LINESTRING") %>%
  st_as_sf()

ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = my_points, shape = 1)

Example image of the new roads data with points

We can then run the same solution, but with an added call to st_nearest_segment() to ID which of the multiple line features is the nearest, and thus narrowing the problem space for each point feature when calling st_nearest_points().

closest_points <- my_points %>% 
  rowwise() %>%
  mutate(
    ##  Get the nearest river segment linestring:
    nearest_segment = roads[st_nearest_feature(geometry, 
                                               roads),],
    ## Get the linestrings between each point and the closest segment:
    line_to_point = st_nearest_points(geometry, nearest_segment),
    ##  Retrieve the point from the line sf that was returned:
    closest_point = st_cast(line_to_point, 'POINT')[2],
    ##  Calculate the distance between the old and new point:     
    distance = st_distance(geometry, closest_point)[,1],
    ##  If under our limit of 100m distant, adopt the new geometry, 
    ##  else keep the original
    snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, 
                                       st_geometry(closest_point),
                                       geometry), 
                                crs = st_crs(roads)))

We can see how the points are offset correctly for each nearest line feature.

ggplot()+
  geom_sf(data = roads %>% st_buffer(400), 
          col = 'red', 
          fill = NA)+
  geom_sf(data = roads, 
          col = 'red')+
  geom_sf(data = closest_points$snapped_point_cond,
          shape = 1, 
          col = 'blue')+
  geom_sf(data = closest_points %>% 
            filter(as.numeric(distance) <= 400) %>% 
            .$line_to_point, 
          linetype = 'dotted')+
  geom_sf(data = closest_points %>%
            filter(as.numeric(distance) <= 400),
          shape = 1)

Offset plot of identified nearest line feature vertex

And finally plot the new "snapped" points.

ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = closest_points$snapped_point_cond, shape = 1, col = 'blue')

Plot of final "snapped" points

Cynic answered 5/3, 2023 at 11:49 Comment(0)
S
1

I tried both solutions provided above and found they are not suitable for me. The first one takes too much time; the second one takes too much memory. I provide a solution that should be both faster and less memory expensive. It has been made possible by the advancement of SF.

My approach is as follows:

  1. I create a buffer around the lines (roads) and join it to one shape (with a union).
  2. I throw away all points that are not inside the buffer.
  3. For each point, I find the closest feature.
  4. For each point and its closest feature, I find the closest point.
  5. I add points attributes and add the points that weren't inside the buffer.

The simple code (working for sf only) is here. Beware, it may add a column named "valid," which may cripple the original data.

require(sf)
require(dplyr)

buffer_lines <- function(lines, dist = 100, verbose = FALSE) {
    if (verbose) message("Creating buffer for lines...")
    st_buffer(lines, dist = dist) |> st_union()
}

get_points_close_to_lines <- function(points, lines, dist = 100,
                                      verbose = FALSE) {
    buff <- buffer_lines(lines, dist = dist, verbose = verbose)
    if (verbose) message("Finding which points are in buffer...")
    st_intersects(points, buff, sparse = FALSE)
}

snap_points_to_lines <- function(points, lines, dist = 100,
                                 all_points = FALSE, verbose = FALSE) {
    inside <- get_points_close_to_lines(points, lines, dist = dist,
                                        verbose = verbose)
    buff_points <- points[inside, ]
    if (verbose) message("Finding nearest features...")
    nf <- st_nearest_feature(buff_points, lines)
    if (verbose) message("Finding nearest points...")
    np <- st_nearest_points(buff_points, lines[nf, ], pairwise = TRUE)
    np <- st_cast(np, "POINT")[c(FALSE, TRUE)]
    if (verbose) message("Adding attributes...")
    out <- st_drop_geometry(buff_points)
    out$geometry <- np
    out <- st_as_sf(out)
    if (all_points) {
        if (verbose) message("Adding points outside buffer...")
        out$valid <- TRUE
        outside <- points[!inside, ]
        outside$valid <- FALSE
        out <- dplyr::bind_rows(out, outside)
    }
    out
}

Sosna answered 31/5, 2022 at 10:27 Comment(0)

© 2022 - 2025 — McMap. All rights reserved.