I had the same problem, and I came up with this solution. The function does the calculation in about 11 seconds (Intel i5 - 50,000 random rectangles). It uses numexpr instead of numpy which is about 5 times faster, and creates temporary lookup dicts.
import pandas as pd
import numpy as np
import numexpr
def find_rectangle_intersections(
df: pd.DataFrame,
columns: tuple | list = ("start_x", "start_y", "end_x", "end_y"),
new_column: str | int | float = "aa_intersecting",
dtype: np.float32 | np.float64 | np.int32 | np.int64 = np.int32,
convert_to_tuples: bool = False,
) -> pd.DataFrame:
"""
Finds rectangle intersections in a DataFrame and populates a new column with results.
The function utilizes NumExpr for calculations, which can significantly improve
performance when dealing with large datasets of rectangles
Args:
df (pd.DataFrame): Input DataFrame containing rectangle coordinates.
columns (tuple or list, optional): Names of columns containing rectangle coordinates
(start_x, start_y, end_x, end_y). Defaults to ("start_x", "start_y", "end_x", "end_y").
new_column (str, int, float, optional): Name of the new column to store intersection results.
Defaults to "aa_intersecting".
dtype (np.float32 | np.float64 | np.int32 | np.int64, optional): Data type for calculations. Defaults to np.int32.
convert_to_tuples (bool, optional): If True, converts intersection results to tuples.
Defaults to False.
Returns:
pd.DataFrame: Input DataFrame with the new_column populated with intersection results.
Example:
import time
import pandas as pd
import numpy as np
min_x = 1
max_x = 100
min_y = 1
max_y = 100
size = 50000
min_width = 1
max_width = 1000
min_height = 1
max_height = 1000
df = pd.DataFrame(
[
(startx := np.random.randint(min_x, max_x, size=size)),
(starty := np.random.randint(min_y, max_y, size=size)),
startx + np.random.randint(min_width, max_width, size=size),
starty + np.random.randint(min_height, max_height, size=size),
]
).T.rename(columns={0: "start_x", 1: "start_y", 2: "end_x", 3: "end_y"})
start = time.perf_counter()
df = find_rectangle_intersections(
df,
columns=("start_x", "start_y", "end_x", "end_y"),
new_column="aa_intersecting",
dtype=np.int32,
convert_to_tuples=False,
)
print(time.perf_counter() - start)
"""
def find_overlaps(rect):
numexpr.evaluate(
"y1 | y2 | y3 | y4",
global_dict={},
local_dict={
"y1": data2smaller[rect[0]],
"y2": data0bigger[rect[2]],
"y3": data1bigger[rect[3]],
"y4": data3smaller[rect[1]],
},
out=tmparray,
casting="no",
)
subresult = datatuples[np.where(tmparray)]
if convert_to_tuples:
return tuple(subresult)
else:
return subresult
datadf = df[[*columns]].astype(dtype)
data = datadf.__array__()
if convert_to_tuples:
datatuples = np.fromiter(map(tuple, data), dtype="object")
else:
datatuples = data
box2_0 = datadf[columns[0]].unique().__array__()
box2_1 = datadf[columns[1]].unique().__array__()
box2_2 = datadf[columns[2]].unique().__array__()
box2_3 = datadf[columns[3]].unique().__array__()
box1_0 = datadf[columns[0]].__array__()
box1_1 = datadf[columns[1]].__array__()
box1_2 = datadf[columns[2]].__array__()
box1_3 = datadf[columns[3]].__array__()
tmparray = np.zeros_like(box1_0).astype(bool)
data2smaller = {
k: numexpr.evaluate(
f"(box1_2 < {k})",
global_dict={},
local_dict={"box1_2": box1_2},
)
for k in box2_0
}
data0bigger = {
k: numexpr.evaluate(
f"(box1_0 > {k})",
global_dict={},
local_dict={"box1_0": box1_0},
)
for k in box2_2
}
data1bigger = {
k: numexpr.evaluate(
f"(box1_1 > {k})",
global_dict={},
local_dict={"box1_1": box1_1},
)
for k in box2_3
}
data3smaller = {
k: numexpr.evaluate(
f"(box1_3 < {k})",
global_dict={},
local_dict={"box1_3": box1_3},
)
for k in box2_1
}
df.loc[:, new_column] = df.apply(
lambda x: find_overlaps(
(x[columns[0]], x[columns[1]], x[columns[2]], x[columns[3]]),
),
axis=1,
)
return df
[rect]
instead. It seems to be what you want to do. – Winterfeedlist_of_rectangles = [rect, rect2]
. Orlist_of_rectangles = list()
list_of_rectangles.append(rect)
list_of_rectangles.append(rect2)
on 3 separate lines. – Deoxyribosefor a in listA: for b in listB: if a.overlap(b): ...
right? – PhilharmonicRectangle.__init__
still looks weird:self.bottom = right self.right = bottom
. Withleft, right, bottom, top
,rect2
has zero width (just saying), and the other rectangles have top < bottom. (If and when you edit this, have a look at Andrew Svetlov's edit to save vertical space, and consider to follow user7138814's suggestion.) – Djokjakarta