Skip to content

Commit

Permalink
Make EarthDataLab work for DimDataYAXArrays (#298)
Browse files Browse the repository at this point in the history
* First tests pass

* Access and analysis ok

* All tests pass

* All tests pass

* bump minimum julia version
  • Loading branch information
meggart authored Aug 1, 2023
1 parent b25ad6e commit 2f7ea94
Show file tree
Hide file tree
Showing 14 changed files with 92 additions and 136 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6'
- '1'
- '1.8'
os:
- ubuntu-latest
- macOS-latest
Expand Down
7 changes: 4 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
name = "EarthDataLab"
uuid = "359177bc-a543-11e8-11b7-bb015dba3358"
authors = ["Fabian Gans <fgans@bgc-jena.mpg.de>"]
version = "0.12.1"
version = "0.13.0"

[deps]
CFTime = "179af706-886a-5703-950a-314cd64e0468"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
DiskArrayTools = "fcd2136c-9f69-4db6-97e5-f31981721d63"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand All @@ -32,9 +33,9 @@ Polynomials = "1, 2.0, 3"
StatsBase = "0.32, 0.33, 0.34"
Tables = "0.2, 1.0"
WeightedOnlineStats = "0.5, 0.6"
YAXArrays = "0.4"
YAXArrays = "0.5"
Zarr = "0.9"
julia = "1.6"
julia = "1.8"

[extras]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
11 changes: 8 additions & 3 deletions src/EarthDataLab.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,28 @@ include("esdc.jl")
include("ESDLTools.jl")

using YAXArrays: @reexport
import DimensionalData as DD

@reexport using Dates: Date, DateTime
@reexport using YAXArrays: (..)
@reexport using YAXArrays: concatenatecubes, caxes,
subsetcube, readcubedata,renameaxis!, YAXArray
@reexport using YAXArrays: CubeAxis, RangeAxis, CategoricalAxis,
getAxis

@reexport using YAXArrays: mapCube, getAxis, InDims, OutDims, Dataset,
CubeTable, cubefittable, fittable, savecube, Cube, open_dataset #From DAT module
@reexport using .Proc: removeMSC, gapFillMSC,normalizeTS,
getMSC, filterTSFFT, getNpY,interpolatecube,
getMedSC, extractLonLats, cubefromshape,
getMedSC, cubefromshape,
gapfillpoly, spatialinterp #From Proc module # from ESDL Tools

@reexport using .ESDC: esdc, esdd

#For backwards compatibility:
RangeAxis(name,vals) = DD.Dim{Symbol(name)}(vals)
CategoricalAxis(name,vals) = DD.Dim{Symbol(name)}(vals)

export RangeAxis, CategoricalAxis

using NetCDF: NetCDF
using Zarr: Zarr

Expand Down
4 changes: 2 additions & 2 deletions src/MSC.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ Returns the mean annual cycle from each time series.
"""
function getMSC(c;kwargs...)
N = getNpY(c)
outdims = OutDims(RangeAxis("MSC",DateTime(1900):Day(ceil(Int,366/N)):DateTime(1900,12,31,23,59,59)),
outdims = OutDims(DD.Dim{:MSC}(DateTime(1900):Day(ceil(Int,366/N)):DateTime(1900,12,31,23,59,59)),
outtype = mscouttype(eltype(c)))
indims = InDims("Time")
mapCube(getMSC,c,getNpY(c);indims=indims,outdims=outdims,kwargs...)
Expand Down Expand Up @@ -174,7 +174,7 @@ Returns the median annual cycle from each time series.
"""
function getMedSC(c;kwargs...)
N = getNpY(c)
outdims = OutDims(RangeAxis("MSC",DateTime(1900):Day(ceil(Int,366/N)):DateTime(1900,12,31,23,59,59)),
outdims = OutDims(DD.Dim{:MSC}(DateTime(1900):Day(ceil(Int,366/N)):DateTime(1900,12,31,23,59,59)),
outtype = mscouttype(eltype(c)))
indims = InDims("Time")
mapCube(getMedSC,c;indims=indims,outdims=outdims,kwargs...)
Expand Down
6 changes: 2 additions & 4 deletions src/Proc.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
module Proc
using YAXArrays.Cubes: YAXArray, cubechunks, caxes
using YAXArrays.Cubes.Axes: getAxis, findAxis, CategoricalAxis, axVal2Index,
RangeAxis, get_bb, axisfrombb, CubeAxis, axname
using YAXArrays.DAT: mapCube, InDims, OutDims, NValid, AnyMissing
using YAXArrays.Datasets: getsavefolder, Cube
using YAXArrays: getAxis
using Dates: year, Date, DateTime
"""
getNpY(cube)
Expand All @@ -12,7 +11,7 @@ Get the number of time steps per year
"""
function getNpY(cube)
timax = getAxis("Time",cube)
years = year.(timax.values)
years = year.(timax.val)
years[end] > years[1] + 1 || error("Must have at least 3 years to estimate number of time steps per year")
return count(i -> i == years[1] + 1, years)
end
Expand All @@ -22,6 +21,5 @@ include("Stats.jl")
include("TSDecomposition.jl")
include("remap.jl")
include("Shapes.jl")
include("extractlonlats.jl")

end
32 changes: 0 additions & 32 deletions src/extractlonlats.jl

This file was deleted.

12 changes: 7 additions & 5 deletions src/remap.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
using DiskArrayTools: InterpolatedDiskArray, DiskArrayTools
using DiskArrayTools.Interpolations: Linear, Flat, Constant, NoInterp
using DiskArrays: eachchunk, GridChunks, approx_chunksize, grid_offset
using YAXArrays: findAxis, getAxis
import DimensionalData as DD
"""
spatialinterp(c,newlons::AbstractRange,newlats::AbstractRange;order=Linear(),bc = Flat())
"""
function spatialinterp(c,newlons::AbstractRange,newlats::AbstractRange;order=Linear(),bc = Flat())
interpolatecube(c,Dict("Lon"=>newlons, "Lat"=>newlats), order = Dict("Lon"=>order,"Lat"=>order))
end
spatialinterp(c,newlons::CubeAxis,newlats::CubeAxis;kwargs...)=
spatialinterp(c,newlons.values,newlats.values;kwargs...)
spatialinterp(c,newlons::DD.Dim,newlats::DD.Dim;kwargs...)=
spatialinterp(c,newlons.val.data,newlats.val.data;kwargs...)


function getsteprat(newax::AbstractRange, oldax::AbstractRange)
Expand All @@ -30,7 +32,7 @@ end
chunkold = eachchunk(c.data)
axold = caxes(c)
axinds = map(i->findAxis(i,c),k)
steprats = map((inew,iold)->getsteprat(newaxdict[inew], axold[iold].values),k,axinds)
steprats = map((inew,iold)->getsteprat(newaxdict[inew], axold[iold].val.data),k,axinds)
newcs = ntuple(ndims(c)) do i
ii = findfirst(isequal(i),axinds)
round(Int,approx_chunksize(chunkold.chunks[i]) * (ii==nothing ? 1 : steprats[ii]))
Expand All @@ -54,7 +56,7 @@ function interpolatecube(c,
if ai === nothing
nothing,NoInterp(),nothing,nothing
else
oldvals = caxes(c)[i].values
oldvals = caxes(c)[i].val.data
newvals = newaxes[ii[ai][1]]
getinterpinds(oldvals, newvals),get(order,ii[ai][1],Constant()),get(bc,ii[ai][1],Flat()),newvals
end
Expand All @@ -68,7 +70,7 @@ function interpolatecube(c,
if val === nothing
ax
else
RangeAxis(axname(ax),val)
DD.rebuild(ax,val)
end
end
YAXArray(newax,ar,c.properties, cleaner = c.cleaner)
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
DiskArrayTools = "fcd2136c-9f69-4db6-97e5-f31981721d63"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
EarthDataLab = "359177bc-a543-11e8-11b7-bb015dba3358"
Expand Down
98 changes: 44 additions & 54 deletions test/access.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,35 @@ using Dates
c=Cube()

@testset "Access single variable" begin
d = subsetcube(c,variable="air_temperature_2m",lon=(10,11),lat=(51,50),
time=(Date("2002-01-01"),Date("2008-12-31")))

d.data.v.indices==(18:21, 17:20, 93:414)
@test d.axes[1].values==10.125:0.25:10.875
@test d.axes[2].values==50.875:-0.25:50.125
@test d.axes[1] == RangeAxis("lon", 10.125:0.25:10.875)
@test d.axes[2] == RangeAxis("lat", 50.875:-0.25:50.125)
d = c[variable=DD.At("air_temperature_2m"),lon=10..11,lat=50..51,
time=Date("2002-01-01")..Date("2008-12-31")]

d.data.v.indices==(18:21, 17:20, 93:414)
@test d.axes[1].val==10.125:0.25:10.875
@test d.axes[2].val==50.875:-0.25:50.125
@test d.axes[1] == Dim{:lon}(10.125:0.25:10.875)
@test d.axes[2] == Dim{:lat}(50.875:-0.25:50.125)
end



@testset "Access multiple variables" begin
d2 = subsetcube(c,variable=["air_temperature_2m","gross_primary_productivity"],lon=(10,11),lat=(50,51),
time=(Date("2002-01-01"),Date("2008-12-31")))

@test d2.axes[4].values == ["air_temperature_2m", "gross_primary_productivity"]
@test d2.data.arrays[1].v.indices ==(18:21, 17:20, 93:414)
@test d2.axes[1].values==10.125:0.25:10.875
@test d2.axes[2].values==50.875:-0.25:50.125
@test first(d2.axes[3].values) == Dates.Date(2002,1,5)
@test last(d2.axes[3].values) == Dates.Date(2008, 12, 30)
d2 = c[variable=DD.At(["air_temperature_2m","gross_primary_productivity"]),lon=10..11,lat=50..51,
time=Date("2002-01-01")..Date("2008-12-31")]

@test d2.axes[4].val == ["air_temperature_2m", "gross_primary_productivity"]
@test d2.data.arrays[1].v.indices ==(18:21, 17:20, 93:414)
@test d2.axes[1].val==10.125:0.25:10.875
@test d2.axes[2].val==50.875:-0.25:50.125
@test first(d2.axes[3].val) == Dates.Date(2002,1,5)
@test last(d2.axes[3].val) == Dates.Date(2008, 12, 30)
end

@testset "Test values in MemCube" begin
d = subsetcube(c,variable="air_temperature_2m",lon=(10,11),lat=(51,50),
time=(Date("2002-01-01"),Date("2008-12-31")))
d2 = subsetcube(c,variable=["air_temperature_2m","gross_primary_productivity"],lon=(10,11),lat=(50,51),
time=(Date("2002-01-01"),Date("2008-12-31")))
d = c[variable=DD.At("air_temperature_2m"),lon=10..11,lat=50..51,
time=Date("2002-01-01")..Date("2008-12-31")]
d2 = c[variable=DD.At(["air_temperature_2m","gross_primary_productivity"]),lon=10..11,lat=50..51,
time=Date("2002-01-01")..Date("2008-12-31")]
data1=readcubedata(d)
data2=readcubedata(d2)

Expand All @@ -40,52 +40,42 @@ data2=readcubedata(d2)


@test isapprox(data1.data[1,1,1:10],Float32[267.9917, 269.9631, 276.71036, 280.88998,
280.90665, 277.02243, 274.5466, 276.919, 279.96243, 279.42276])
280.90665, 277.02243, 274.5466, 276.919, 279.96243, 279.42276])

@test isapprox(data2.data[1,1,1:10,1],Float32[267.9917, 269.9631, 276.71036, 280.88998,
280.90665, 277.02243, 274.5466, 276.919, 279.96243, 279.42276])
280.90665, 277.02243, 274.5466, 276.919, 279.96243, 279.42276])

@test caxes(data1)[1:2]==CubeAxis[RangeAxis("lon",10.125:0.25:10.875),RangeAxis("lat",50.875:-0.25:50.125)]
@test caxes(data1)[1:2]==(Dim{:lon}(10.125:0.25:10.875),Dim{:lat}(50.875:-0.25:50.125))

tax = caxes(data1)[3]
@test YAXArrays.Cubes.Axes.axsym(tax)==:time
@test tax.values[1] == Date(2002,1,5)
@test tax.values[end] == Date(2008,12,30)
@test length(tax.values) == 7*46
@test DD.name(tax)==:Time
@test tax.val[1] == Date(2002,1,5)
@test tax.val[end] == Date(2008,12,30)
@test length(tax.val) == 7*46

@test caxes(data2)[[1,2,4]]==CubeAxis[RangeAxis("lon",10.125:0.25:10.875),RangeAxis("lat",50.875:-0.25:50.125),CategoricalAxis("Variable",["air_temperature_2m","gross_primary_productivity"])]
@test caxes(data2)[[1,2,4]]==(Dim{:lon}(10.125:0.25:10.875),Dim{:lat}(50.875:-0.25:50.125),Dim{:Variable}(["air_temperature_2m","gross_primary_productivity"]))

tax = caxes(data2)[3]
@test YAXArrays.Cubes.Axes.axsym(tax)==:time
@test tax.values[1] == Date(2002,1,5)
@test tax.values[end] == Date(2008,12,30)
@test length(tax.values) == 7*46
@test DD.name(tax)==:Time
@test tax.val[1] == Date(2002,1,5)
@test tax.val[end] == Date(2008,12,30)
@test length(tax.val) == 7*46

end

@testset "Coordinate extraction" begin
d = subsetcube(c,variable="air_temperature_2m",lon=(10,11),lat=(51,50),
time=(Date("2002-01-01"),Date("2008-12-31")))
data1=readcubedata(d)
# Test reading of coordinate list
ll=[10.1 50.2;10.5 51.1;10.8 51.1]
llcube = readcubedata(extractLonLats(data1,ll))
@test llcube.data[1,:]==data1.data[1,4,:]
@test llcube.data[2,:]==data1.data[3,1,:]
@test llcube.data[3,:]==data1.data[4,1,:]
end

@testset "Accessing regions" begin
#Test access datacube by region
d3 = subsetcube(c,variable="gross_primary_productivity",region="Austria",time=Date("2005-01-01"))
@test d3.axes==[RangeAxis("lon",9.625:0.25:14.875),RangeAxis("lat",48.875:-0.25:47.375)]
d3 = c[variable=DD.At("gross_primary_productivity"),region="Austria",time=DD.Near(DateTime("2005-01-01"))]
@test d3.axes==(Dim{:lon}(9.625:0.25:14.875),RangeAxis("lat",48.875:-0.25:47.375))

end

using DiskArrayTools: DiskArrayStack

@testset "Saving and loading cubes" begin
d = subsetcube(c,variable="air_temperature_2m",lon=(10,31),lat=(51,50),
time=(Date("2002-01-01"),Date("2008-12-31")))
d = c[variable=DD.At("air_temperature_2m"),lon=(10..31),lat=(50..51),
time=(Date("2002-01-01")..Date("2008-12-31"))]
data1=readcubedata(d)
#Test saving cubes
dire=tempname()
Expand All @@ -97,7 +87,7 @@ using DiskArrayTools: DiskArrayStack
@test data1.data==data3.data

# Test loadOrGenerate macro
d=subsetcube(c,time=Date(2001)..Date(2005),lon=(10,11),lat=(50,51),variable=["gross_primary_productivity","net_ecosystem_exchange"])
d=c[time=Date(2001)..Date(2005),lon=(10..11),lat=(50..51),variable=DD.At(["gross_primary_productivity","net_ecosystem_exchange"])]

danom = removeMSC(d)

Expand All @@ -108,7 +98,7 @@ using DiskArrayTools: DiskArrayStack
danom=readcubedata(danom)
danom2=readcubedata(Cube(zp))

@test danom.axes==danom2.axes
@test danom.axes[1:3]==danom2.axes[1:3]
@test all(map(isequal,danom.data,danom2.data))

ncf = string(tempname(),".nc")
Expand All @@ -118,8 +108,8 @@ using DiskArrayTools: DiskArrayStack
#Test exportcube
@test ncread(ncf,"lon") == 10.125:0.25:10.875
@test ncread(ncf,"lat") == 50.875:-0.25:50.125
@test ncgetatt(ncf,"time","units") == "days since 1980-01-01"
@test getAxis("time",danom).values .- DateTime(1980) == Millisecond.(Day.(ncread(ncf,"time")))
@test ncgetatt(ncf,"Time","units") == "days since 1980-01-01"
@test getAxis("Time",danom).val .- DateTime(1980) == Millisecond.(Day.(ncread(ncf,"Time")))

anc = replace(ncread(ncf,"gross_primary_productivity")[:,:,:],-9999.0=>missing)
@test all(isequal.(anc, danom.data[:,:,:,1]))
Expand All @@ -131,9 +121,9 @@ end

@testset "ESDC v3" begin
c = esdd(res="low")
d = c.gross_primary_productivity[time=Date(2005)][443:444,139:140]
d = c.gross_primary_productivity[time=DD.Near(DateTime(2005))].data[443:444,139:140]
d == Float32[3.1673577 3.7342484; 3.3267372 4.0305696]

c = esdd(res="tiny")
c.gross_primary_productivity[time=Date(2005)][44,14] == 2.3713999f0
c.gross_primary_productivity[time=DD.Near(DateTime(2005))].data[44,14] == 2.3713999f0
end
Loading

2 comments on commit 2f7ea94

@meggart
Copy link
Member Author

@meggart meggart commented on 2f7ea94 Aug 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/88802

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.13.0 -m "<description of version>" 2f7ea94bb8b27211ff38303ce60dd4766d6b1b07
git push origin v0.13.0

Please sign in to comment.