我有这个考拉数据框,它是其他两个数据框的合并。它有 4 列被重写为指定键上的组的最大值。如果另一列是否为空,还会得到一个值为 0 1 的新列。
t0 = time.time()
NauticalData = ShipDatekeyTs_Calendar.merge(
derfact_nautical_ts5, on=["ShipId", "DateKey", "ts180"], how="left"
)
NauticalData = NauticalData.assign(
SOG=(NauticalData.groupby(["key_x"], as_index=False)["SOG"].max())["SOG"],
latitude=(NauticalData.groupby(["key_x"], as_index=False)["longitude"].max())[
"longitude"
],
longitude=(NauticalData.groupby(["key_x"], as_index=False)["longitude"].max())[
"longitude"
],
Heading=(NauticalData.groupby(["key_x"], as_index=False)["Heading"].max())[
"Heading"
],
)
NauticalData = NauticalData.assign(
SOG_IsNull=np.where((NauticalData["SOG"].to_numpy()).isnull(), 1, 0)
)
t1 = time.time()
print(str(t1 - t0) + " CREATE TABLE #NauticalData")
但它给了我这个错误:
AnalysisException: Resolved attribute(s) SOG#34059,longitude#34109,longitude#34159,Heading#34209 missing from
__index_level_0__#33970L,ShipId#33937,DateKey#33938,ts180#33939,ts180_date#33940,
minTs180#33941,maxTs180#33942,key_x#33943,SOG#33944,latitude#33945,longitude#33946,
Heading#33947,EUPortDetails#33948,ts5_seconds#33949L,ts5_minute#33950L,ts180_str#33951,
key_y#33952,__natural_order__#33989L in operator !Project [__index_level_0__#33970L,
ShipId#33937, DateKey#33938, ts180#33939, ts180_date#33940, minTs180#33941, maxTs180#33942,
key_x#33943, SOG#34059 AS SOG#34226, longitude#34109 AS latitude#34228,
longitude#34159 AS longitude#34230, Heading#34209 AS Heading#34232,
EUPortDetails#33948, ts5_seconds#33949L, ts5_minute#33950L, ts180_str#33951, key_y#33952].
Attribute(s) with the same name appear in the operation: SOG,longitude,longitude,Heading.
Please check if the right attribute(s) are used.;
在这条线上:
NauticalData = NauticalData.assign(SOG_IsNull = np.where((NauticalData['SOG'].to_numpy()).isnull(), 1, 0))
或使用 NauticalData 作为 df 的所有其他行。偶数显示(NauticalData)