diff --git a/CHANGELOG.md b/CHANGELOG.md index d9553c42..83dc6c16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # CHANGELOG +0.1.22 (2025-01-12) +=================== +This release adds support for Android. This support means that Jiff will +automatically read its special concatenated time zone database, and will +read the `persist.sys.timezone` property to determine the system's current +time zone. + +See [PLATFORM] for more specific information about Android support. + +* [#140](https://github.com/BurntSushi/jiff/issues/140): +Add support for the Android platform. + + 0.1.21 (2025-01-04) =================== This release includes a new API for setting the unit designator label in a diff --git a/Cargo.toml b/Cargo.toml index f4260f26..f8123812 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,13 @@ This library is heavily inspired by the Temporal project. categories = ["date-and-time", "no-std"] keywords = ["date", "time", "calendar", "zone", "duration"] edition = "2021" -exclude = ["/.github", "/tmp"] autotests = false autoexamples = false rust-version = "1.70" +# We include `/tests/lib.rs` to squash a `cargo package` warning that the +# `integration` test target is being ignored. We don't include anything else +# so tests obviously won't work, but it makes `cargo package` quiet. +include = ["/src/**/*.rs", "/tests/lib.rs", "/*.md"] [workspace] members = [ @@ -29,7 +32,13 @@ members = [ # Features are documented in the "Crate features" section of the crate docs: # https://docs.rs/jiff/*/#crate-features [features] -default = ["std", "tz-system", "tzdb-bundle-platform", "tzdb-zoneinfo"] +default = [ + "std", + "tz-system", + "tzdb-bundle-platform", + "tzdb-zoneinfo", + "tzdb-concatenated", +] std = ["alloc", "log?/std", "serde?/std"] alloc = ["serde?/alloc", "portable-atomic-util/alloc"] serde = ["dep:serde"] @@ -54,6 +63,15 @@ tzdb-bundle-always = ["dep:jiff-tzdb", "alloc"] # database that is typically found at /usr/share/zoneinfo on macOS and Linux. tzdb-zoneinfo = ["std"] +# This enables the system concatenated time zone database. On some platforms, +# like Android, this is the standard time zone database instead of the more +# widespread `zoneinfo` directory created by `zic` itseld. +# +# This being enabled just means that some standard paths will be searched +# for the concatenated database and it will be used if the standard zoneinfo +# directory couldn't be found. +tzdb-concatenated = ["std"] + # This enables bindings to web browser APIs for retrieving the current time # and configured time zone. This ONLY applies on wasm32-unknown-unknown and # wasm64-unknown-unknown targets. Specifically, *not* on wasm32-wasi or diff --git a/PLATFORM.md b/PLATFORM.md index f4bf9cf8..70d5b6b8 100644 --- a/PLATFORM.md +++ b/PLATFORM.md @@ -45,13 +45,16 @@ to losslessly roundtrip datetimes via an interchange format specified by ## Environment variables -Jiff reads exactly two environment variables. These variables are read on -all platforms that support environment variables. So for example, Jiff -will respect `TZ` on Windows. Note though that some environments, like +Jiff generally only reads two environment variables. These variables are +read on all platforms that support environment variables. So for example, +Jiff will respect `TZ` on Windows. Note though that some environments, like `wasm32-wasip1` or `wasm32-unknown-emscripten`, are sandboxed by default. A sandboxed environment typically makes reading environment variables set outside -the sandbox impossible (or require opt-in support, such as [wasmtime]'s `-S -inherit-env` or `--env` flags). +the sandbox impossible (or require opt-in support, such as [wasmtime]'s +`-S inherit-env` or `--env` flags). + +Jiff may read additional environment variables for platform specific +integration. ### `TZDIR` @@ -96,6 +99,22 @@ for an [existing issue for your platform][issue-platform], and if one doesn't exist, please [file a new issue][issue-new]. Otherwise, setting `TZ` should be considered as a work-around. +### `ANDROID_ROOT` and `ANDROID_DATA` + +These environment variables are read to help determine the location of +Android's [Concatenated Time Zone Database]. If `ANDROID_ROOT` is not defined, +then Jiff uses `/system` as its default value. If `ANDROID_DATA` is not +defined, then Jiff uses `/data/misc` as its default value. + +Note that these environment variables are not necessarily only read on +Android, although they likely only make sense in the context of an Android +environment. This is because Jiff's supported for the Concatenated Time +Zone Database is platform independent. For example, Jiff will let users +create a database from a Concatenated Time Zone Database file via the +`TimeZoneDatabase::from_concatenated_path` API on _any_ platform. This is +intended to enable maximum flexibility, and because there is no specific +reason to make the Concatenated Time Zone Database format Android-specific. + ## Platforms This section lists the platforms that Jiff has explicit support for. Support @@ -126,14 +145,14 @@ detect. If your Unix system uses a different directory, you may try to submit a PR adding support for it in Jiff proper, or just set the `TZDIR` environment variable. -The existence of `/usr/share/zoneinfo` is not guaranteed in all Unix environments. -For example, stripped down Docker containers might omit a full copy of the -time zone database. Jiff will still work in such environments, but all IANA -time zone identifier lookups will fail. To fix this, you can either install the -IANA Time Zone Database into your environment, or you can enable the Jiff -crate feature `tzdb-bundle-always`. This compile time setting will cause Jiff -to depend on `jiff-tzdb`, which includes a complete copy of the IANA Time Zone -Database embedded into the compiled artifact. +The existence of `/usr/share/zoneinfo` is not guaranteed in all Unix +environments. For example, stripped down Docker containers might omit a full +copy of the time zone database. Jiff will still work in such environments, but +all IANA time zone identifier lookups will fail. To fix this, you can either +install the IANA Time Zone Database into your environment, or you can enable +the Jiff crate feature `tzdb-bundle-always`. This compile time setting will +cause Jiff to depend on `jiff-tzdb`, which includes a complete copy of the IANA +Time Zone Database embedded into the compiled artifact. Bundling the IANA Time Zone Database should only be done as a last resort. Especially on Unix systems, it is greatly preferred to use the system copy of @@ -182,6 +201,84 @@ different way to configure the system time zone, please check [available platform issues][issue-platform] for a related issue. If one doesn't exist, please [create a new issue][issue-new].) +### Android + +#### Current time + +All Android platforms should be supported in terms of getting the current time. +This support comes from Rust's standard library. + +#### IANA Time Zone Database + +Unlike effectively every other Unix system, Android has its own special time +zone database format. While it still makes use of TZif formatted data for +defining time zone transitions themselves, it does not use the `zoneinfo` +directory format (where there is one file per time zone). Instead, it +_concatenates_ all time zone files into one single file. This is combined with +some meta data that makes it quick to search for time zones by their IANA time +zone identifier. + +This format is technically unnamed, but Jiff refers to it as the [Concatenated +Time Zone Database] format. It has no formal specification. Jiff's +implementation was done by inferring the format implemented by the Android +Platform and also the implementation in [Go's standard library]. In practice +this tends to work well, although there are obviously no guarantees. This is +a practical trade-off given that there doesn't appear to be any obvious +alternative. Moreover, others (such as Go, a project maintained by the same +company that maintains Android) are already doing it, so it seems likely that +if Android decides to make breaking changes to the format, they'll need to +version it in some way to avoid breaking the ecosystem. + +Note that Jiff supports reading this format on all platforms, not just Android. +For example, Jiff users can use the `TimeZoneDatabase::from_concatenated_path` +API to create a `TimeZoneDatabase` from a concatenated `tzdata` file on any +platform. + +If users of Jiff are uncomfortable relying on Android's "unstable" time zone +database format, then there are three options available to them after disabling +the `tzdb-concatenated` crate feature: + +* They can own the responsibility of putting a standard `zoneinfo` database +installation into their environment. Then set the `TZDIR` environment variable +to point at it, and Jiff will automatically use it. +* Enable the `tzdb-bundle-always` crate feature. This will cause all time zone +database to be compiled into your binary. Nothing else needs to be done. Jiff +will automatically use the bundled copy. +* Manually create `TimeZone` values via `TimeZone::tzif` from TZif formatted +data. With this approach, you may need to change how you use Jiff in some +cases. For example, any `intz` method will need to be changed to use the +`to_zoned` equivalent. + +#### System time zone + +The system time zone on Android is discovered by reading the +`persist.sys.timezone` property. + +Note that in addition to Android developers citing the [Concatenated Time Zone +Database] format as unstable, they also discourage the discovery of the system +time zone through properties as well. (See [chrono#1018] and [chrono#1148] +for some discussion on this topic.) For Jiff at least, there is no feasible +alternative. Apparently, the blessed API is to use their Java libraries, but +that doesn't seem feasible to Jiff since I (Jiff's author) is unaware of a +mechanism for easily calling Java code from Rust. The only option left is to +use their `libc` APIs, which they did at least improve to make them thread +safe, but this isn't enough for Jiff. For Jiff, we really want the actual IANA +time zone identifier, and it isn't clear how to discover this from their `libc` +APIs. Moreover, Jiff supports far more sophisticated operations on a time zone +(like dealing with discontinuities in civil time) that cannot be implemented on +top of `libc`-style APIs. Using Android's `libc` APIs for time handling would +be a huge regression compared to all other platforms. + +It's worth noting that all other popular Unix systems provide at least some +reliable means of both querying the time zone database _and_ discovering the +system-wide IANA time zone identifier. Why Android is incapable of following +the existing conventions for Unix systems is unclear. + +If users of Jiff are uncomfortable relying on Android's `persist.sys.timezone` +property, then they should avoid APIs like `Zoned::now` and `TimeZone::system`. +Instead, they can use `TimeZone::UTC`, which is what the fallback time zone +would be when the system time zone cannot be discovered. + ### Windows #### Current time @@ -317,3 +414,7 @@ the time zone in Jiff's configured IANA Time Zone Database. [CLDR XML data]: https://github.com/unicode-org/cldr/raw/main/common/supplemental/windowsZones.xml [`Intl.DateTimeFormat`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/DateTimeFormat/resolvedOptions#timezone [`Date.now`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/now +[Concatenated Time Zone Database]: https://android.googlesource.com/platform/libcore/+/jb-mr2-release/luni/src/main/java/libcore/util/ZoneInfoDB.java +[Go's standard library]: https://github.com/golang/go/blob/19e923182e590ae6568c2c714f20f32512aeb3e3/src/time/zoneinfo_android.go +[chrono#1018]: https://github.com/chronotope/chrono/pull/1018 +[chrono#1148]: https://github.com/chronotope/chrono/pull/1148 diff --git a/jiff-tzdb-platform/Cargo.toml b/jiff-tzdb-platform/Cargo.toml index bc0bbb3d..99e766bd 100644 --- a/jiff-tzdb-platform/Cargo.toml +++ b/jiff-tzdb-platform/Cargo.toml @@ -14,6 +14,7 @@ keywords = ["date", "time", "temporal", "zone", "iana"] workspace = ".." edition = "2021" rust-version = "1.70" +include = ["/*.rs"] [lib] name = "jiff_tzdb_platform" diff --git a/jiff-tzdb/Cargo.toml b/jiff-tzdb/Cargo.toml index ee2fa0b9..dde2e1d9 100644 --- a/jiff-tzdb/Cargo.toml +++ b/jiff-tzdb/Cargo.toml @@ -12,6 +12,7 @@ keywords = ["date", "time", "temporal", "zone", "iana"] workspace = ".." edition = "2021" rust-version = "1.70" +include = ["/*.rs", "/*.dat"] [lib] name = "jiff_tzdb" diff --git a/scripts/jiff-debug b/scripts/jiff-debug index eea6e4e0..a2c809fb 100755 --- a/scripts/jiff-debug +++ b/scripts/jiff-debug @@ -43,13 +43,22 @@ case "$1" in tz::tzif::tests::debug_tzif -- --nocapture \ 2>&1 > /dev/null ;; + tzdata-list) + if [ -z "$2" ]; then + echo "Usage: $(basename "$0") tzdata-list " >&2 + exit 1 + fi + JIFF_DEBUG_CONCATENATED_TZDATA="$2" $cargo_test --quiet --lib --features logging \ + tz::db::concatenated::inner::tests::debug_tzdata_list -- --nocapture \ + 2>&1 > /dev/null + ;; zoneinfo-walk) if [ -z "$2" ]; then echo "Usage: $(basename "$0") zoneinfo-walk " >&2 exit 1 fi JIFF_DEBUG_ZONEINFO_DIR="$2" $cargo_test --quiet --lib --features logging \ - tz::db::zoneinfo::tests::debug_zoneinfo_walk -- --nocapture \ + tz::db::zoneinfo::inner::tests::debug_zoneinfo_walk -- --nocapture \ 2>&1 > /dev/null ;; *) diff --git a/src/lib.rs b/src/lib.rs index 323e886a..f4d73c2f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -650,7 +650,15 @@ For more, see the [`fmt::serde`] sub-module. (This requires enabling Jiff's * **tzdb-zoneinfo** (enabled by default) - When enabled, Jiff will attempt to look for your system's copy of the Time Zone Database. - +* **tzdb-concatenated** (enabled by default) - + When enabled, Jiff will attempt to look for a system copy of the + [Concatenated Time Zone Database]. This is primarily meant for reading time + zone information on Android platforms. The `ANDROID_ROOT` and `ANDROID_DATA` + environment variables (with sensible default fallbacks) are used to construct + candidate paths to look for this database. For more on this, see the + [Android section of the platform support documentation](crate::_documentation::platform#android). + +[Concatenated Time Zone Database]: https://android.googlesource.com/platform/libcore/+/jb-mr2-release/luni/src/main/java/libcore/util/ZoneInfoDB.java */ #![no_std] diff --git a/src/tz/concatenated.rs b/src/tz/concatenated.rs new file mode 100644 index 00000000..98d6ed5d --- /dev/null +++ b/src/tz/concatenated.rs @@ -0,0 +1,1138 @@ +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; + +use crate::{ + error::{err, Error, ErrorContext}, + tz::TimeZone, + util::{array_str::ArrayStr, escape, utf8}, +}; + +/// An abstraction for reading data from Android's concatenated TZif data file. +/// +/// This abstraction is designed in a way that the data is reads from is +/// largely untrusted. This means that, no matter what sequence of bytes is +/// given, this should never panic (or else there is a bug). Moreover, there is +/// some guarding against disproportionate allocation. While big allocations +/// can still happen, they require a proportionally large data file. (Thus, +/// callers can guard against this by considering the size of the data.) What +/// this implementation prevents against is accidentally OOMing or panicking as +/// a result of naively doing `Vec::with_capacity(rdr.decode_integer())`. +/// +/// This is also designed to work in alloc-only contexts mostly out of "good +/// sense." Technically we don't (currently) use this outside of `std`, since +/// it's only used for reading tzdb on Android from the file system. But we do +/// things this way in case we end up wanting to use it for something else. +/// If we needed this for no-alloc environments, then that's a much bigger +/// change, if only because it would require making the TZif parser no-alloc +/// compatible, and it's not quite clear what the best way to do that is. We +/// achieve the alloc-only API be introducing a trait that abstracts over a +/// `File` for random access to bytes. +#[derive(Debug)] +pub(crate) struct ConcatenatedTzif { + rdr: R, + header: Header, +} + +impl ConcatenatedTzif { + /// Open the concatenated TZif file using the reader given. + /// + /// This reads the header and will return an error if the header is + /// invalid. + pub(crate) fn open(rdr: R) -> Result, Error> { + let header = Header::read(&rdr)?; + Ok(ConcatenatedTzif { rdr, header }) + } + + /// Returns the version of this `tzdata` database. + pub(crate) fn version(&self) -> ArrayStr<5> { + self.header.version + } + + /// Returns a `TimeZone` extracted from this concatenated TZif data. + /// + /// This is only successful if an index entry with the corresponding + /// IANA time zone identifier could be found. + /// + /// Callers must provide two scratch buffers that are used for temporary + /// allocation internally. Callers can create a new buffer for each call, + /// but it's likely faster to reuse them if possible. + /// + /// If a `TimeZone` is returned, it is guaranteed to have a present IANA + /// name (accessible via `TimeZone::iana_name`). + pub(crate) fn get( + &self, + query: &str, + scratch1: &mut Vec, + scratch2: &mut Vec, + ) -> Result, Error> { + scratch1.clear(); + alloc(scratch1, self.header.index_len())?; + self.rdr + .read_exact_at(scratch1, self.header.index_offset) + .context("failed to read index block")?; + + let mut index = &**scratch1; + while !index.is_empty() { + let entry = IndexEntry::new(&index[..IndexEntry::LEN]); + index = &index[IndexEntry::LEN..]; + let ordering = utf8::cmp_ignore_ascii_case_bytes( + entry.name_bytes(), + query.as_bytes(), + ); + if ordering.is_ne() { + continue; + } + + // OK because `entry.name_bytes()` is equal to `query`, + // ignoring ASCII case. The only way this can be true is is + // `entry.name_bytes()` is itself valid UTF-8. + let name = entry.name().unwrap(); + scratch2.clear(); + alloc(scratch2, entry.len())?; + let start = self.header.data_offset.saturating_add(entry.start()); + self.rdr + .read_exact_at(scratch2, start) + .context("failed to read TZif data block")?; + return TimeZone::tzif(name, scratch2).map(Some); + } + Ok(None) + } + + /// Returns a list of all IANA time zone identifiers in this concatenated + /// TZif data. + /// + /// Callers must provide a scratch buffer that is used for temporary + /// allocation internally. Callers can create a new buffer for each call, + /// but it's likely faster to reuse them if possible. + pub(crate) fn available( + &self, + scratch: &mut Vec, + ) -> Result, Error> { + scratch.clear(); + alloc(scratch, self.header.index_len())?; + self.rdr + .read_exact_at(scratch, self.header.index_offset) + .context("failed to read index block")?; + + let names_len = self.header.index_len() / IndexEntry::LEN; + // Why are we careless with this alloc? Well, its size is proportional + // to the actual amount of data in the file. So the only way to get a + // big alloc is to create a huge file. This seems... fine... I guess. + // Where as the `alloc` above is done on the basis of an arbitrary + // 32-bit integer. + let mut names = Vec::with_capacity(names_len); + let mut index = &**scratch; + while !index.is_empty() { + let entry = IndexEntry::new(&index[..IndexEntry::LEN]); + index = &index[IndexEntry::LEN..]; + names.push(entry.name()?.to_string()); + } + Ok(names) + } +} + +/// The header of Android concatenated TZif data. +/// +/// The header has the version and some offsets indicating the location of +/// the index entry (a list of IANA time zone identifiers and offsets into +/// the data block) and the actual TZif data. +#[derive(Debug)] +struct Header { + version: ArrayStr<5>, + index_offset: u64, + data_offset: u64, +} + +impl Header { + /// Reads the header from Android's concatenated TZif concatenated data + /// file. + /// + /// Basically, this gives us the version and some offsets for where to find + /// data. + fn read(rdr: &R) -> Result { + // 12 bytes plus 3 4-byte big endian integers. + let mut buf = [0; 12 + 3 * 4]; + rdr.read_exact_at(&mut buf, 0) + .context("failed to read concatenated TZif header")?; + if &buf[..6] != b"tzdata" { + return Err(err!( + "expected first 6 bytes of concatenated TZif header \ + to be `tzdata`, but found `{found}`", + found = escape::Bytes(&buf[..6]), + )); + } + if buf[11] != 0 { + return Err(err!( + "expected last byte of concatenated TZif header \ + to be NUL, but found `{found}`", + found = escape::Bytes(&buf[..12]), + )); + } + + let version = { + let version = core::str::from_utf8(&buf[6..11]).map_err(|_| { + err!( + "expected version in concatenated TZif header to \ + be valid UTF-8, but found `{found}`", + found = escape::Bytes(&buf[6..11]), + ) + })?; + // OK because `version` is exactly 5 bytes, by construction. + ArrayStr::new(version).unwrap() + }; + // OK because the sub-slice is sized to exactly 4 bytes. + let index_offset = u64::from(read_be32(&buf[12..16])); + // OK because the sub-slice is sized to exactly 4 bytes. + let data_offset = u64::from(read_be32(&buf[16..20])); + if index_offset > data_offset { + return Err(err!( + "invalid index ({index_offset}) and data ({data_offset}) \ + offsets, expected index offset to be less than or equal \ + to data offset", + )); + } + // we don't read 20..24 since we don't care about zonetab (yet) + let header = Header { version, index_offset, data_offset }; + if header.index_len() % IndexEntry::LEN != 0 { + return Err(err!( + "length of index block is not a multiple {len}", + len = IndexEntry::LEN, + )); + } + Ok(header) + } + + /// Returns the length of the index section of the concatenated tzdb. + /// + /// Beware of using this to create allocations. In theory, this should be + /// trusted data, but the length can be any 32-bit integer. If it's used to + /// create an allocation, it could potentially be up to 4GB. + fn index_len(&self) -> usize { + // OK because `Header` parsing returns an error if this overflows. + let len = self.data_offset.checked_sub(self.index_offset).unwrap(); + // N.B. Overflow only occurs here on 16-bit (or smaller) platforms, + // which at the time of writing, is not supported by Jiff. Instead, + // a `usize::MAX` will trigger an allocation error. + usize::try_from(len).unwrap_or(usize::MAX) + } +} + +/// A view into a single index entry in the index block of concatenated TZif +/// data. +/// +/// If we had safe transmute, it would be much nicer to define this as +/// +/// ```text +/// #[derive(Clone, Copy)] +/// #[repr(transparent, align(1))] +/// struct IndexEntry { +/// name: [u8; 40], +/// start: u32, +/// len: u32, +/// _raw_utc_offset: u32, // we don't use this here +/// } +/// ``` +/// +/// And probably implement a trait asserting that this is plain old data (or +/// derive it safely). And then we could cast `&[u8]` to `&[IndexEntry]` +/// safely and access the individual fields as is. We could do this today, +/// but not in safe code. And since this isn't performance critical, it's just +/// not worth flagging this code as potentially containing undefined behavior. +#[derive(Clone, Copy)] +struct IndexEntry<'a>(&'a [u8]); + +impl<'a> IndexEntry<'a> { + /// The length of an index entry. It's fixed size. 40 bytes for the IANA + /// time zone identifier. 4 bytes for each of 3 big-endian integers. The + /// first is the start of the corresponding TZif data within the data + /// block. The second is the length of said TZif data. And the third is + /// the "raw UTC offset" of the time zone. (I'm unclear on the semantics + /// of this third, since some time zones have more than one because of + /// DST. And of course, it can change over time. Since I don't know what + /// Android uses this for, I'm not sure how I'm supposed to interpret it.) + const LEN: usize = 40 + 3 * 4; + + /// Creates a new view into an entry in the concatenated TZif index. + /// + /// # Panics + /// + /// When `slice` does not have the expected length (`IndexEntry::LEN`). + fn new(slice: &'a [u8]) -> IndexEntry<'a> { + assert_eq!(slice.len(), IndexEntry::LEN, "invalid index entry length"); + IndexEntry(slice) + } + + /// Like `name_bytes`, but as a `&str`. + /// + /// This returns an error if the name isn't valid UTF-8. + fn name(&self) -> Result<&str, Error> { + core::str::from_utf8(self.name_bytes()).map_err(|_| { + err!( + "IANA time zone identifier `{name}` is not valid UTF-8", + name = escape::Bytes(self.name_bytes()), + ) + }) + } + + /// Returns the IANA time zone identifier as a byte slice. + /// + /// In theory, an empty slice could be returned. But if that happens, + /// then there is probably a bug in this code somewhere, the format + /// changed or the source data is corrupt somehow. + fn name_bytes(&self) -> &'a [u8] { + let mut block = &self.0[..40]; + while block.last().copied() == Some(0) { + block = &block[..block.len() - 1]; + } + block + } + + /// Returns the starting offset (relative to the beginning of the TZif + /// data block) of the corresponding TZif data. + fn start(&self) -> u64 { + u64::from(read_be32(&self.0[40..44])) + } + + /// Returns the length of the TZif data block. + /// + /// Beware of using this to create allocations. In theory, this should be + /// trusted data, but the length can be any 32-bit integer. If it's used to + /// create an allocation, it could potentially be up to 4GB. + fn len(&self) -> usize { + // N.B. Overflow only occurs here on 16-bit (or smaller) platforms, + // which at the time of writing, is not supported by Jiff. Instead, + // a `usize::MAX` will trigger an allocation error. + usize::try_from(read_be32(&self.0[44..48])).unwrap_or(usize::MAX) + } +} + +impl<'a> core::fmt::Debug for IndexEntry<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("IndexEntry") + .field("name", &escape::Bytes(self.name_bytes())) + .field("start", &self.start()) + .field("len", &self.len()) + .finish() + } +} + +/// A crate-internal trait defining the source of concatenated TZif data. +/// +/// Basically, this just provides a way to read a fixed amount of data at a +/// particular offset. This is obviously trivial to implement on `&[u8]` (and +/// indeed, we do so for testing), but we use it to abstract over platform +/// differences when reading from a `File`. +/// +/// The intent is that on Unix, this will use `pread`, which avoids a file +/// seek followed by a `read` call. +pub(crate) trait Read { + fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error>; +} + +impl<'a, R: Read + ?Sized> Read for &'a R { + fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> { + (**self).read_exact_at(buf, offset) + } +} + +/// Reads a 32-bit big endian encoded integer from `bytes`. +/// +/// # Panics +/// +/// If `bytes.len() != 4`. +fn read_be32(bytes: &[u8]) -> u32 { + u32::from_be_bytes(bytes.try_into().expect("slice of length 4")) +} + +#[cfg(test)] +impl Read for [u8] { + fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> { + let offset = usize::try_from(offset) + .map_err(|_| err!("offset `{offset}` overflowed `usize`"))?; + let Some(slice) = self.get(offset..) else { + return Err(err!( + "given offset `{offset}` is not valid \ + (only {len} bytes are available)", + len = self.len(), + )); + }; + if buf.len() > slice.len() { + return Err(err!( + "unexpected EOF, expected {len} bytes but only have {have}", + len = buf.len(), + have = slice.len() + )); + } + buf.copy_from_slice(&slice[..buf.len()]); + Ok(()) + } +} + +#[cfg(all(feature = "std", unix))] +impl Read for std::fs::File { + fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> { + use std::os::unix::fs::FileExt; + FileExt::read_exact_at(self, buf, offset).map_err(Error::io) + } +} + +#[cfg(all(feature = "std", windows))] +impl Read for std::fs::File { + fn read_exact_at( + &self, + mut buf: &mut [u8], + mut offset: u64, + ) -> Result<(), Error> { + use std::{io, os::windows::fs::FileExt}; + + while !buf.is_empty() { + match self.seek_read(buf, offset) { + Ok(0) => break, + Ok(n) => { + buf = &mut buf[n..]; + offset = u64::try_from(n) + .ok() + .and_then(|n| n.checked_add(offset)) + .ok_or_else(|| { + err!("offset overflow when reading from `File`") + })?; + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(Error::io(e)), + } + } + if !buf.is_empty() { + Err(Error::io(io::Error::new( + io::ErrorKind::UnexpectedEof, + "failed to fill whole buffer", + ))) + } else { + Ok(()) + } + } +} + +#[cfg(all(feature = "std", all(not(unix), not(windows))))] +impl Read for std::fs::File { + fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> { + use std::io::{Read as _, Seek as _, SeekFrom}; + let mut file = self; + file.seek(SeekFrom::Start(offset)).map_err(Error::io).with_context( + || err!("failed to seek to offset {offset} in `File`"), + )?; + file.read_exact(buf).map_err(Error::io) + } +} + +/// Allocates `additional` extra bytes on the `Vec` given and set them to `0`. +/// +/// This specifically will never do an "OOM panic" and will instead return an +/// error (courtesy of `Vec::try_reserve_exact`). It will also return an error +/// without even trying the allocation if it's deemed to be "too big." +/// +/// This is used so that we are extra careful about creating allocations based +/// on integers parsed from concatenated TZif data. Generally speaking, the +/// data we parse should be "trusted" (since it's probably not writable by +/// anyone other than `root`), but who knows where this code will ultimately be +/// used. So we try pretty hard to avoid panicking (even for OOM). +/// +/// To be clear, we probably could panic on the error path. The goal here +/// isn't to avoid OOM because you can't allocate 10 bytes---Jiff isn't robust +/// enough in that kind of environment by far. The goal is to avoid OOM for +/// exorbitantly large allocations through some kind of attack vector. +fn alloc(bytes: &mut Vec, additional: usize) -> Result<(), Error> { + // At time of writing, the biggest TZif data file is a few KB. And the + // index block is tens of KB. So impose a limit that is a couple of orders + // of magnitude bigger, but still overall pretty small for... some systems. + // Anyway, I welcome improvements to this heuristic! + const LIMIT: usize = 10 * 1 << 20; + + if additional > LIMIT { + return Err(err!( + "attempted to allocate more than {LIMIT} bytes \ + while reading concatenated TZif data, which \ + exceeds a heuristic limit to prevent huge allocations \ + (please file a bug if this error is inappropriate)", + )); + } + bytes.try_reserve_exact(additional).map_err(|_| { + err!( + "failed to allocation {additional} bytes \ + for reading concatenated TZif data" + ) + })?; + // This... can't actually happen right? + let new_len = bytes + .len() + .checked_add(additional) + .ok_or_else(|| err!("total allocation length overflowed `usize`"))?; + bytes.resize(new_len, 0); + Ok(()) +} + +#[cfg(test)] +mod tests { + use crate::{ + civil::date, + tz::{ + offset, testdata::ANDROID_CONCATENATED_TZIF, AmbiguousOffset, + Offset, + }, + Timestamp, + }; + + use super::*; + + fn unambiguous(offset_hours: i8) -> AmbiguousOffset { + let offset = offset(offset_hours); + o_unambiguous(offset) + } + + fn gap( + earlier_offset_hours: i8, + later_offset_hours: i8, + ) -> AmbiguousOffset { + let earlier = offset(earlier_offset_hours); + let later = offset(later_offset_hours); + o_gap(earlier, later) + } + + fn fold( + earlier_offset_hours: i8, + later_offset_hours: i8, + ) -> AmbiguousOffset { + let earlier = offset(earlier_offset_hours); + let later = offset(later_offset_hours); + o_fold(earlier, later) + } + + fn o_unambiguous(offset: Offset) -> AmbiguousOffset { + AmbiguousOffset::Unambiguous { offset } + } + + fn o_gap(earlier: Offset, later: Offset) -> AmbiguousOffset { + AmbiguousOffset::Gap { before: earlier, after: later } + } + + fn o_fold(earlier: Offset, later: Offset) -> AmbiguousOffset { + AmbiguousOffset::Fold { before: earlier, after: later } + } + + // Copied from src/tz/mod.rs. + #[test] + fn time_zone_tzif_to_ambiguous_timestamp() { + let tests: &[(&str, &[_])] = &[ + ( + "America/New_York", + &[ + ((1969, 12, 31, 19, 0, 0, 0), unambiguous(-5)), + ((2024, 3, 10, 1, 59, 59, 999_999_999), unambiguous(-5)), + ((2024, 3, 10, 2, 0, 0, 0), gap(-5, -4)), + ((2024, 3, 10, 2, 59, 59, 999_999_999), gap(-5, -4)), + ((2024, 3, 10, 3, 0, 0, 0), unambiguous(-4)), + ((2024, 11, 3, 0, 59, 59, 999_999_999), unambiguous(-4)), + ((2024, 11, 3, 1, 0, 0, 0), fold(-4, -5)), + ((2024, 11, 3, 1, 59, 59, 999_999_999), fold(-4, -5)), + ((2024, 11, 3, 2, 0, 0, 0), unambiguous(-5)), + ], + ), + ( + "Europe/Dublin", + &[ + ((1970, 1, 1, 0, 0, 0, 0), unambiguous(1)), + ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)), + ((2024, 3, 31, 1, 0, 0, 0), gap(0, 1)), + ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 1)), + ((2024, 3, 31, 2, 0, 0, 0), unambiguous(1)), + ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(1)), + ((2024, 10, 27, 1, 0, 0, 0), fold(1, 0)), + ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(1, 0)), + ((2024, 10, 27, 2, 0, 0, 0), unambiguous(0)), + ], + ), + ( + "Australia/Tasmania", + &[ + ((1970, 1, 1, 11, 0, 0, 0), unambiguous(11)), + ((2024, 4, 7, 1, 59, 59, 999_999_999), unambiguous(11)), + ((2024, 4, 7, 2, 0, 0, 0), fold(11, 10)), + ((2024, 4, 7, 2, 59, 59, 999_999_999), fold(11, 10)), + ((2024, 4, 7, 3, 0, 0, 0), unambiguous(10)), + ((2024, 10, 6, 1, 59, 59, 999_999_999), unambiguous(10)), + ((2024, 10, 6, 2, 0, 0, 0), gap(10, 11)), + ((2024, 10, 6, 2, 59, 59, 999_999_999), gap(10, 11)), + ((2024, 10, 6, 3, 0, 0, 0), unambiguous(11)), + ], + ), + ( + "Antarctica/Troll", + &[ + ((1970, 1, 1, 0, 0, 0, 0), unambiguous(0)), + // test the gap + ((2024, 3, 31, 0, 59, 59, 999_999_999), unambiguous(0)), + ((2024, 3, 31, 1, 0, 0, 0), gap(0, 2)), + ((2024, 3, 31, 1, 59, 59, 999_999_999), gap(0, 2)), + // still in the gap! + ((2024, 3, 31, 2, 0, 0, 0), gap(0, 2)), + ((2024, 3, 31, 2, 59, 59, 999_999_999), gap(0, 2)), + // finally out + ((2024, 3, 31, 3, 0, 0, 0), unambiguous(2)), + // test the fold + ((2024, 10, 27, 0, 59, 59, 999_999_999), unambiguous(2)), + ((2024, 10, 27, 1, 0, 0, 0), fold(2, 0)), + ((2024, 10, 27, 1, 59, 59, 999_999_999), fold(2, 0)), + // still in the fold! + ((2024, 10, 27, 2, 0, 0, 0), fold(2, 0)), + ((2024, 10, 27, 2, 59, 59, 999_999_999), fold(2, 0)), + // finally out + ((2024, 10, 27, 3, 0, 0, 0), unambiguous(0)), + ], + ), + ( + "America/St_Johns", + &[ + ( + (1969, 12, 31, 20, 30, 0, 0), + o_unambiguous(-Offset::hms(3, 30, 0)), + ), + ( + (2024, 3, 10, 1, 59, 59, 999_999_999), + o_unambiguous(-Offset::hms(3, 30, 0)), + ), + ( + (2024, 3, 10, 2, 0, 0, 0), + o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)), + ), + ( + (2024, 3, 10, 2, 59, 59, 999_999_999), + o_gap(-Offset::hms(3, 30, 0), -Offset::hms(2, 30, 0)), + ), + ( + (2024, 3, 10, 3, 0, 0, 0), + o_unambiguous(-Offset::hms(2, 30, 0)), + ), + ( + (2024, 11, 3, 0, 59, 59, 999_999_999), + o_unambiguous(-Offset::hms(2, 30, 0)), + ), + ( + (2024, 11, 3, 1, 0, 0, 0), + o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)), + ), + ( + (2024, 11, 3, 1, 59, 59, 999_999_999), + o_fold(-Offset::hms(2, 30, 0), -Offset::hms(3, 30, 0)), + ), + ( + (2024, 11, 3, 2, 0, 0, 0), + o_unambiguous(-Offset::hms(3, 30, 0)), + ), + ], + ), + // This time zone has an interesting transition where it jumps + // backwards a full day at 1867-10-19T15:30:00. + ( + "America/Sitka", + &[ + ((1969, 12, 31, 16, 0, 0, 0), unambiguous(-8)), + ( + (-9999, 1, 2, 16, 58, 46, 0), + o_unambiguous(Offset::hms(14, 58, 47)), + ), + ( + (1867, 10, 18, 15, 29, 59, 0), + o_unambiguous(Offset::hms(14, 58, 47)), + ), + ( + (1867, 10, 18, 15, 30, 0, 0), + // A fold of 24 hours!!! + o_fold( + Offset::hms(14, 58, 47), + -Offset::hms(9, 1, 13), + ), + ), + ( + (1867, 10, 19, 15, 29, 59, 999_999_999), + // Still in the fold... + o_fold( + Offset::hms(14, 58, 47), + -Offset::hms(9, 1, 13), + ), + ), + ( + (1867, 10, 19, 15, 30, 0, 0), + // Finally out. + o_unambiguous(-Offset::hms(9, 1, 13)), + ), + ], + ), + // As with to_datetime, we test every possible transition + // point here since this time zone has a small number of them. + ( + "Pacific/Honolulu", + &[ + ( + (1896, 1, 13, 11, 59, 59, 0), + o_unambiguous(-Offset::hms(10, 31, 26)), + ), + ( + (1896, 1, 13, 12, 0, 0, 0), + o_gap( + -Offset::hms(10, 31, 26), + -Offset::hms(10, 30, 0), + ), + ), + ( + (1896, 1, 13, 12, 1, 25, 0), + o_gap( + -Offset::hms(10, 31, 26), + -Offset::hms(10, 30, 0), + ), + ), + ( + (1896, 1, 13, 12, 1, 26, 0), + o_unambiguous(-Offset::hms(10, 30, 0)), + ), + ( + (1933, 4, 30, 1, 59, 59, 0), + o_unambiguous(-Offset::hms(10, 30, 0)), + ), + ( + (1933, 4, 30, 2, 0, 0, 0), + o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)), + ), + ( + (1933, 4, 30, 2, 59, 59, 0), + o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)), + ), + ( + (1933, 4, 30, 3, 0, 0, 0), + o_unambiguous(-Offset::hms(9, 30, 0)), + ), + ( + (1933, 5, 21, 10, 59, 59, 0), + o_unambiguous(-Offset::hms(9, 30, 0)), + ), + ( + (1933, 5, 21, 11, 0, 0, 0), + o_fold( + -Offset::hms(9, 30, 0), + -Offset::hms(10, 30, 0), + ), + ), + ( + (1933, 5, 21, 11, 59, 59, 0), + o_fold( + -Offset::hms(9, 30, 0), + -Offset::hms(10, 30, 0), + ), + ), + ( + (1933, 5, 21, 12, 0, 0, 0), + o_unambiguous(-Offset::hms(10, 30, 0)), + ), + ( + (1942, 2, 9, 1, 59, 59, 0), + o_unambiguous(-Offset::hms(10, 30, 0)), + ), + ( + (1942, 2, 9, 2, 0, 0, 0), + o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)), + ), + ( + (1942, 2, 9, 2, 59, 59, 0), + o_gap(-Offset::hms(10, 30, 0), -Offset::hms(9, 30, 0)), + ), + ( + (1942, 2, 9, 3, 0, 0, 0), + o_unambiguous(-Offset::hms(9, 30, 0)), + ), + ( + (1945, 8, 14, 13, 29, 59, 0), + o_unambiguous(-Offset::hms(9, 30, 0)), + ), + ( + (1945, 8, 14, 13, 30, 0, 0), + o_unambiguous(-Offset::hms(9, 30, 0)), + ), + ( + (1945, 8, 14, 13, 30, 1, 0), + o_unambiguous(-Offset::hms(9, 30, 0)), + ), + ( + (1945, 9, 30, 0, 59, 59, 0), + o_unambiguous(-Offset::hms(9, 30, 0)), + ), + ( + (1945, 9, 30, 1, 0, 0, 0), + o_fold( + -Offset::hms(9, 30, 0), + -Offset::hms(10, 30, 0), + ), + ), + ( + (1945, 9, 30, 1, 59, 59, 0), + o_fold( + -Offset::hms(9, 30, 0), + -Offset::hms(10, 30, 0), + ), + ), + ( + (1945, 9, 30, 2, 0, 0, 0), + o_unambiguous(-Offset::hms(10, 30, 0)), + ), + ( + (1947, 6, 8, 1, 59, 59, 0), + o_unambiguous(-Offset::hms(10, 30, 0)), + ), + ( + (1947, 6, 8, 2, 0, 0, 0), + o_gap(-Offset::hms(10, 30, 0), -offset(10)), + ), + ( + (1947, 6, 8, 2, 29, 59, 0), + o_gap(-Offset::hms(10, 30, 0), -offset(10)), + ), + ((1947, 6, 8, 2, 30, 0, 0), unambiguous(-10)), + ], + ), + ]; + let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap(); + let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]); + for &(tzname, datetimes_to_ambiguous) in tests { + let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap(); + for &(datetime, ambiguous_kind) in datetimes_to_ambiguous { + let (year, month, day, hour, min, sec, nano) = datetime; + let dt = date(year, month, day).at(hour, min, sec, nano); + let got = tz.to_ambiguous_zoned(dt); + assert_eq!( + got.offset(), + ambiguous_kind, + "\nTZ: {tzname}\ndatetime: \ + {year:04}-{month:02}-{day:02}T\ + {hour:02}:{min:02}:{sec:02}.{nano:09}", + ); + } + } + } + + // Copied from src/tz/mod.rs. + #[test] + fn time_zone_tzif_to_datetime() { + let o = |hours| offset(hours); + let tests: &[(&str, &[_])] = &[ + ( + "America/New_York", + &[ + ((0, 0), o(-5), "EST", (1969, 12, 31, 19, 0, 0, 0)), + ( + (1710052200, 0), + o(-5), + "EST", + (2024, 3, 10, 1, 30, 0, 0), + ), + ( + (1710053999, 999_999_999), + o(-5), + "EST", + (2024, 3, 10, 1, 59, 59, 999_999_999), + ), + ((1710054000, 0), o(-4), "EDT", (2024, 3, 10, 3, 0, 0, 0)), + ( + (1710055800, 0), + o(-4), + "EDT", + (2024, 3, 10, 3, 30, 0, 0), + ), + ((1730610000, 0), o(-4), "EDT", (2024, 11, 3, 1, 0, 0, 0)), + ( + (1730611800, 0), + o(-4), + "EDT", + (2024, 11, 3, 1, 30, 0, 0), + ), + ( + (1730613599, 999_999_999), + o(-4), + "EDT", + (2024, 11, 3, 1, 59, 59, 999_999_999), + ), + ((1730613600, 0), o(-5), "EST", (2024, 11, 3, 1, 0, 0, 0)), + ( + (1730615400, 0), + o(-5), + "EST", + (2024, 11, 3, 1, 30, 0, 0), + ), + ], + ), + ( + "Australia/Tasmania", + &[ + ((0, 0), o(11), "AEDT", (1970, 1, 1, 11, 0, 0, 0)), + ( + (1728142200, 0), + o(10), + "AEST", + (2024, 10, 6, 1, 30, 0, 0), + ), + ( + (1728143999, 999_999_999), + o(10), + "AEST", + (2024, 10, 6, 1, 59, 59, 999_999_999), + ), + ( + (1728144000, 0), + o(11), + "AEDT", + (2024, 10, 6, 3, 0, 0, 0), + ), + ( + (1728145800, 0), + o(11), + "AEDT", + (2024, 10, 6, 3, 30, 0, 0), + ), + ((1712415600, 0), o(11), "AEDT", (2024, 4, 7, 2, 0, 0, 0)), + ( + (1712417400, 0), + o(11), + "AEDT", + (2024, 4, 7, 2, 30, 0, 0), + ), + ( + (1712419199, 999_999_999), + o(11), + "AEDT", + (2024, 4, 7, 2, 59, 59, 999_999_999), + ), + ((1712419200, 0), o(10), "AEST", (2024, 4, 7, 2, 0, 0, 0)), + ( + (1712421000, 0), + o(10), + "AEST", + (2024, 4, 7, 2, 30, 0, 0), + ), + ], + ), + // Pacific/Honolulu is small eough that we just test every + // possible instant before, at and after each transition. + ( + "Pacific/Honolulu", + &[ + ( + (-2334101315, 0), + -Offset::hms(10, 31, 26), + "LMT", + (1896, 1, 13, 11, 59, 59, 0), + ), + ( + (-2334101314, 0), + -Offset::hms(10, 30, 0), + "HST", + (1896, 1, 13, 12, 1, 26, 0), + ), + ( + (-2334101313, 0), + -Offset::hms(10, 30, 0), + "HST", + (1896, 1, 13, 12, 1, 27, 0), + ), + ( + (-1157283001, 0), + -Offset::hms(10, 30, 0), + "HST", + (1933, 4, 30, 1, 59, 59, 0), + ), + ( + (-1157283000, 0), + -Offset::hms(9, 30, 0), + "HDT", + (1933, 4, 30, 3, 0, 0, 0), + ), + ( + (-1157282999, 0), + -Offset::hms(9, 30, 0), + "HDT", + (1933, 4, 30, 3, 0, 1, 0), + ), + ( + (-1155436201, 0), + -Offset::hms(9, 30, 0), + "HDT", + (1933, 5, 21, 11, 59, 59, 0), + ), + ( + (-1155436200, 0), + -Offset::hms(10, 30, 0), + "HST", + (1933, 5, 21, 11, 0, 0, 0), + ), + ( + (-1155436199, 0), + -Offset::hms(10, 30, 0), + "HST", + (1933, 5, 21, 11, 0, 1, 0), + ), + ( + (-880198201, 0), + -Offset::hms(10, 30, 0), + "HST", + (1942, 2, 9, 1, 59, 59, 0), + ), + ( + (-880198200, 0), + -Offset::hms(9, 30, 0), + "HWT", + (1942, 2, 9, 3, 0, 0, 0), + ), + ( + (-880198199, 0), + -Offset::hms(9, 30, 0), + "HWT", + (1942, 2, 9, 3, 0, 1, 0), + ), + ( + (-769395601, 0), + -Offset::hms(9, 30, 0), + "HWT", + (1945, 8, 14, 13, 29, 59, 0), + ), + ( + (-769395600, 0), + -Offset::hms(9, 30, 0), + "HPT", + (1945, 8, 14, 13, 30, 0, 0), + ), + ( + (-769395599, 0), + -Offset::hms(9, 30, 0), + "HPT", + (1945, 8, 14, 13, 30, 1, 0), + ), + ( + (-765376201, 0), + -Offset::hms(9, 30, 0), + "HPT", + (1945, 9, 30, 1, 59, 59, 0), + ), + ( + (-765376200, 0), + -Offset::hms(10, 30, 0), + "HST", + (1945, 9, 30, 1, 0, 0, 0), + ), + ( + (-765376199, 0), + -Offset::hms(10, 30, 0), + "HST", + (1945, 9, 30, 1, 0, 1, 0), + ), + ( + (-712150201, 0), + -Offset::hms(10, 30, 0), + "HST", + (1947, 6, 8, 1, 59, 59, 0), + ), + // At this point, we hit the last transition and the POSIX + // TZ string takes over. + ( + (-712150200, 0), + -Offset::hms(10, 0, 0), + "HST", + (1947, 6, 8, 2, 30, 0, 0), + ), + ( + (-712150199, 0), + -Offset::hms(10, 0, 0), + "HST", + (1947, 6, 8, 2, 30, 1, 0), + ), + ], + ), + // This time zone has an interesting transition where it jumps + // backwards a full day at 1867-10-19T15:30:00. + ( + "America/Sitka", + &[ + ((0, 0), o(-8), "PST", (1969, 12, 31, 16, 0, 0, 0)), + ( + (-377705023201, 0), + Offset::hms(14, 58, 47), + "LMT", + (-9999, 1, 2, 16, 58, 46, 0), + ), + ( + (-3225223728, 0), + Offset::hms(14, 58, 47), + "LMT", + (1867, 10, 19, 15, 29, 59, 0), + ), + // Notice the 24 hour time jump backwards a whole day! + ( + (-3225223727, 0), + -Offset::hms(9, 1, 13), + "LMT", + (1867, 10, 18, 15, 30, 0, 0), + ), + ( + (-3225223726, 0), + -Offset::hms(9, 1, 13), + "LMT", + (1867, 10, 18, 15, 30, 1, 0), + ), + ], + ), + ]; + let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap(); + let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]); + for &(tzname, timestamps_to_datetimes) in tests { + let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap(); + for &((unix_sec, unix_nano), offset, abbrev, datetime) in + timestamps_to_datetimes + { + let (year, month, day, hour, min, sec, nano) = datetime; + let timestamp = Timestamp::new(unix_sec, unix_nano).unwrap(); + let (got_offset, _, got_abbrev) = tz.to_offset(timestamp); + assert_eq!( + got_offset, offset, + "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})", + ); + assert_eq!( + got_abbrev, abbrev, + "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})", + ); + assert_eq!( + got_offset.to_datetime(timestamp), + date(year, month, day).at(hour, min, sec, nano), + "\nTZ={tzname}, timestamp({unix_sec}, {unix_nano})", + ); + } + } + } + + #[test] + fn read_all_time_zones() { + let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap(); + let available = db.available(&mut alloc::vec![]).unwrap(); + let (mut buf1, mut buf2) = (alloc::vec![], alloc::vec![]); + for tzname in available.iter() { + let tz = db.get(tzname, &mut buf1, &mut buf2).unwrap().unwrap(); + assert_eq!(tzname, tz.iana_name().unwrap()); + } + } + + #[test] + fn available_len() { + let db = ConcatenatedTzif::open(ANDROID_CONCATENATED_TZIF).unwrap(); + let available = db.available(&mut alloc::vec![]).unwrap(); + assert_eq!(596, available.len()); + for window in available.windows(2) { + let (x1, x2) = (&window[0], &window[1]); + assert!(x1 < x2, "{x1} is not less than {x2}"); + } + } +} diff --git a/src/tz/db/bundled/disabled.rs b/src/tz/db/bundled/disabled.rs index f0bb35bd..6432f330 100644 --- a/src/tz/db/bundled/disabled.rs +++ b/src/tz/db/bundled/disabled.rs @@ -1,6 +1,6 @@ use crate::tz::TimeZone; -#[derive(Clone, Debug)] +#[derive(Clone)] pub(crate) struct BundledZoneInfo; impl BundledZoneInfo { @@ -23,3 +23,9 @@ impl BundledZoneInfo { true } } + +impl core::fmt::Debug for BundledZoneInfo { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Bundled(unavailable)") + } +} diff --git a/src/tz/db/bundled/enabled.rs b/src/tz/db/bundled/enabled.rs index ff63c6a5..33e5daaa 100644 --- a/src/tz/db/bundled/enabled.rs +++ b/src/tz/db/bundled/enabled.rs @@ -2,7 +2,6 @@ use alloc::{string::String, vec::Vec}; use crate::tz::TimeZone; -#[derive(Debug)] pub(crate) struct BundledZoneInfo; impl BundledZoneInfo { @@ -46,6 +45,12 @@ impl BundledZoneInfo { } } +impl core::fmt::Debug for BundledZoneInfo { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Bundled(available)") + } +} + fn available() -> impl Iterator { #[cfg(feature = "tzdb-bundle-always")] { @@ -82,7 +87,7 @@ fn lookup(name: &str) -> Option<(&'static str, &'static [u8])> { mod global { use std::{string::String, string::ToString, sync::RwLock, vec::Vec}; - use crate::tz::TimeZone; + use crate::{tz::TimeZone, util::utf8}; static CACHED_ZONES: RwLock = RwLock::new(CachedZones { zones: Vec::new() }); @@ -128,7 +133,7 @@ mod global { fn get_zone_index(&self, query: &str) -> Result { self.zones.binary_search_by(|entry| { - cmp_ignore_ascii_case(&entry.name, query) + utf8::cmp_ignore_ascii_case(&entry.name, query) }) } @@ -142,11 +147,4 @@ mod global { name: String, tz: TimeZone, } - - /// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`. - fn cmp_ignore_ascii_case(s1: &str, s2: &str) -> core::cmp::Ordering { - let it1 = s1.as_bytes().iter().map(|&b| b.to_ascii_lowercase()); - let it2 = s2.as_bytes().iter().map(|&b| b.to_ascii_lowercase()); - it1.cmp(it2) - } } diff --git a/src/tz/db/concatenated/disabled.rs b/src/tz/db/concatenated/disabled.rs new file mode 100644 index 00000000..3ff18330 --- /dev/null +++ b/src/tz/db/concatenated/disabled.rs @@ -0,0 +1,47 @@ +use crate::tz::TimeZone; + +#[derive(Clone)] +pub(crate) struct Concatenated; + +impl Concatenated { + pub(crate) fn from_env() -> Concatenated { + Concatenated + } + + #[cfg(feature = "std")] + pub(crate) fn from_path( + path: &std::path::Path, + ) -> Result { + Err(crate::error::err!( + "system concatenated tzdb unavailable: \ + crate feature `tzdb-concatenated` is disabled, \ + opening tzdb at {path} has therefore failed", + path = path.display(), + )) + } + + pub(crate) fn none() -> Concatenated { + Concatenated + } + + pub(crate) fn reset(&self) {} + + pub(crate) fn get(&self, _query: &str) -> Option { + None + } + + #[cfg(feature = "alloc")] + pub(crate) fn available(&self) -> alloc::vec::Vec { + alloc::vec::Vec::new() + } + + pub(crate) fn is_definitively_empty(&self) -> bool { + true + } +} + +impl core::fmt::Debug for Concatenated { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Concatenated(unavailable)") + } +} diff --git a/src/tz/db/concatenated/enabled.rs b/src/tz/db/concatenated/enabled.rs new file mode 100644 index 00000000..2e726898 --- /dev/null +++ b/src/tz/db/concatenated/enabled.rs @@ -0,0 +1,570 @@ +use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, +}; + +use std::{ + ffi::OsString, + fs::File, + path::{Path, PathBuf}, + sync::{Arc, RwLock}, + time::Duration, +}; + +use crate::{ + error::{err, Error}, + timestamp::Timestamp, + tz::{concatenated::ConcatenatedTzif, TimeZone}, + util::{self, array_str::ArrayStr, cache::Expiration, utf8}, +}; + +const DEFAULT_TTL: Duration = Duration::new(5 * 60, 0); + +/// The places to look for a concatenated `tzdata` file. +static TZDATA_LOCATIONS: &[TzdataLocation] = &[ + TzdataLocation::Env { + name: "ANDROID_ROOT", + default: "/system", + suffix: "usr/share/zoneinfo/tzdata", + }, + TzdataLocation::Env { + name: "ANDROID_DATA", + default: "/data/misc", + suffix: "zoneinfo/current/tzdata", + }, +]; + +pub(crate) struct Concatenated { + path: Option, + names: Option, + zones: RwLock, +} + +impl Concatenated { + pub(crate) fn from_env() -> Concatenated { + let mut attempted = vec![]; + for loc in TZDATA_LOCATIONS { + let path = loc.to_path_buf(); + trace!( + "opening concatenated tzdata database at {}", + path.display() + ); + match Concatenated::from_path(&path) { + Ok(db) => return db, + Err(_err) => { + trace!("failed opening {}: {_err}", path.display()); + } + } + attempted.push(path.to_string_lossy().into_owned()); + } + debug!( + "could not find concatenated tzdata database at any of the \ + following paths: {}", + attempted.join(", "), + ); + Concatenated::none() + } + + pub(crate) fn from_path(path: &Path) -> Result { + let names = Some(Names::new(path)?); + let zones = RwLock::new(CachedZones::new()); + Ok(Concatenated { path: Some(path.to_path_buf()), names, zones }) + } + + /// Creates a "dummy" zoneinfo database in which all lookups fail. + pub(crate) fn none() -> Concatenated { + let path = None; + let names = None; + let zones = RwLock::new(CachedZones::new()); + Concatenated { path, names, zones } + } + + pub(crate) fn reset(&self) { + let mut zones = self.zones.write().unwrap(); + if let Some(ref names) = self.names { + names.reset(); + } + zones.reset(); + } + + pub(crate) fn get(&self, query: &str) -> Option { + // We just always assume UTC exists and map it to our special const + // TimeZone::UTC value. + if query == "UTC" { + return Some(TimeZone::UTC); + } + let path = self.path.as_ref()?; + // The fast path is when the query matches a pre-existing unexpired + // time zone. + { + let zones = self.zones.read().unwrap(); + if let Some(czone) = zones.get(query) { + if !czone.is_expired() { + trace!( + "for time zone query `{query}`, \ + found cached zone `{}` \ + (expiration={}, last_modified={:?})", + czone.tz.diagnostic_name(), + czone.expiration, + czone.last_modified, + ); + return Some(czone.tz.clone()); + } + } + } + // At this point, one of three possible cases is true: + // + // 1. The given query does not match any time zone in this database. + // 2. A time zone exists, but isn't cached. + // 3. A zime exists and is cached, but needs to be revalidated. + // + // While (3) is probably the common case since our TTLs are pretty + // short, both (2) and (3) require write access. Thus we rule out (1) + // before acquiring a write lock on the entire database. Plus, we'll + // need the zone info for case (2) and possibly for (3) if cache + // revalidation fails. + // + // I feel kind of bad about all this because it seems to me like there + // is too much work being done while holding on to the write lock. + // In particular, it seems like bad juju to do any I/O of any kind + // while holding any lock at all. I think I could design something + // that avoids doing I/O while holding a lock, but it seems a lot more + // complicated. (And what happens if the I/O becomes outdated by the + // time you acquire the lock?) + let mut zones = self.zones.write().unwrap(); + let ttl = zones.ttl; + match zones.get_zone_index(query) { + Ok(i) => { + let czone = &mut zones.zones[i]; + if czone.revalidate(path, ttl) { + // Metadata on the file didn't change, so we assume the + // file hasn't either. + return Some(czone.tz.clone()); + } + // Revalidation failed. Re-read the TZif data. + let (scratch1, scratch2) = zones.scratch(); + let czone = match CachedTimeZone::new( + path, query, ttl, scratch1, scratch2, + ) { + Ok(Some(czone)) => czone, + Ok(None) => return None, + Err(_err) => { + warn!( + "failed to re-cache time zone {query} \ + from {path}: {_err}", + path = path.display(), + ); + return None; + } + }; + let tz = czone.tz.clone(); + zones.zones[i] = czone; + Some(tz) + } + Err(i) => { + let (scratch1, scratch2) = zones.scratch(); + let czone = match CachedTimeZone::new( + path, query, ttl, scratch1, scratch2, + ) { + Ok(Some(czone)) => czone, + Ok(None) => return None, + Err(_err) => { + warn!( + "failed to cache time zone {query} \ + from {path}: {_err}", + path = path.display(), + ); + return None; + } + }; + let tz = czone.tz.clone(); + zones.zones.insert(i, czone); + Some(tz) + } + } + } + + pub(crate) fn available(&self) -> Vec { + let Some(path) = self.path.as_ref() else { return vec![] }; + let Some(names) = self.names.as_ref() else { return vec![] }; + names.available(path) + } + + pub(crate) fn is_definitively_empty(&self) -> bool { + self.names.is_none() + } +} + +impl core::fmt::Debug for Concatenated { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Concatenated(")?; + if let Some(ref path) = self.path { + write!(f, "{}", path.display())?; + } else { + write!(f, "unavailable")?; + } + write!(f, ")") + } +} + +#[derive(Debug)] +struct CachedZones { + zones: Vec, + ttl: Duration, + scratch1: Vec, + scratch2: Vec, +} + +impl CachedZones { + const DEFAULT_TTL: Duration = DEFAULT_TTL; + + fn new() -> CachedZones { + CachedZones { + zones: vec![], + ttl: CachedZones::DEFAULT_TTL, + scratch1: vec![], + scratch2: vec![], + } + } + + fn get(&self, query: &str) -> Option<&CachedTimeZone> { + self.get_zone_index(query).ok().map(|i| &self.zones[i]) + } + + fn get_zone_index(&self, query: &str) -> Result { + self.zones.binary_search_by(|zone| { + utf8::cmp_ignore_ascii_case(zone.name(), query) + }) + } + + fn reset(&mut self) { + self.zones.clear(); + } + + fn scratch(&mut self) -> (&mut Vec, &mut Vec) { + (&mut self.scratch1, &mut self.scratch2) + } +} + +#[derive(Clone, Debug)] +struct CachedTimeZone { + tz: TimeZone, + expiration: Expiration, + last_modified: Option, +} + +impl CachedTimeZone { + /// Create a new cached time zone. + /// + /// `path` should be a concatenated `tzdata` file. `query` is the IANA time + /// zone identifier we're looing for. The `ttl` says how long + /// the cached time zone should minimally remain fresh for. + /// + /// The `scratch1` and `scratch2` given are used to help amortize + /// allocation when deserializing TZif data from the concatenated `tzdata` + /// file. + /// + /// If no such time zone exists and no other error occurred, then + /// `Ok(None)` is returned. + fn new( + path: &Path, + query: &str, + ttl: Duration, + scratch1: &mut Vec, + scratch2: &mut Vec, + ) -> Result, Error> { + let file = File::open(path).map_err(|e| Error::io(e).path(path))?; + let db = ConcatenatedTzif::open(&file)?; + let Some(tz) = db.get(query, scratch1, scratch2)? else { + return Ok(None); + }; + let last_modified = util::fs::last_modified_from_file(path, &file); + let expiration = Expiration::after(ttl); + Ok(Some(CachedTimeZone { tz, expiration, last_modified })) + } + + /// Returns true if this time zone has gone stale and should, at minimum, + /// be revalidated. + fn is_expired(&self) -> bool { + self.expiration.is_expired() + } + + /// Returns the IANA time zone identifier of this cached time zone. + fn name(&self) -> &str { + // OK because `ConcatenatedTzif` guarantees all `TimeZone` values it + // returns have an IANA name. + self.tz.iana_name().unwrap() + } + + /// Attempts to revalidate this cached time zone. + /// + /// Upon successful revalidation (that is, the cached time zone is still + /// fresh and okay to use), this returns true. Otherwise, the cached time + /// zone should be considered stale and must be re-created. + /// + /// Note that technically another layer of revalidation could be done. + /// For example, we could keep a checksum of the TZif data, and only + /// consider rebuilding the time zone when the checksum changes. But I + /// think the last modified metadata will in practice be good enough, and + /// parsing TZif data should be quite fast. + /// + /// `path` should be a concatenated `tzdata` file. + fn revalidate(&mut self, path: &Path, ttl: Duration) -> bool { + // If we started with no last modified timestamp, then I guess we + // should always fail revalidation? I suppose a case could be made to + // do the opposite: always pass revalidation. + let Some(old_last_modified) = self.last_modified else { + trace!( + "revalidation for {name} in {path} failed because \ + old last modified time is unavailable", + name = self.name(), + path = path.display(), + ); + return false; + }; + let Some(new_last_modified) = util::fs::last_modified_from_path(path) + else { + trace!( + "revalidation for {name} in {path} failed because \ + new last modified time is unavailable", + name = self.name(), + path = path.display(), + ); + return false; + }; + // We consider any change to invalidate cache. + if old_last_modified != new_last_modified { + trace!( + "revalidation for {name} in {path} failed because \ + last modified times do not match: old = {old} != {new} = new", + name = self.name(), + path = path.display(), + old = old_last_modified, + new = new_last_modified, + ); + return false; + } + trace!( + "revalidation for {name} in {path} succeeded because \ + last modified times match: old = {old} == {new} = new", + name = self.name(), + path = path.display(), + old = old_last_modified, + new = new_last_modified, + ); + self.expiration = Expiration::after(ttl); + true + } +} + +/// A collection of time zone names extracted from a concatenated tzdata file. +/// +/// This type is responsible not just for providing the names, but also for +/// updating them periodically. +/// +/// Every name _should_ correspond to an entry in the data block of the +/// corresponding `tzdata` file, but we generally don't take advantage of this. +/// The reason is that the file could theoretically change. Between when we +/// extract the names and when we do a TZif lookup later. This is all perfectly +/// manageable, but it should only be done if there's a benchmark demanding +/// more effort be spent here. As it stands, we do have a rudimentary caching +/// mechanism, so not all time zone lookups go through this slower path. (This +/// is also why `Names` has no lookup routine. There's just a routine to return +/// all names.) +#[derive(Debug)] +struct Names { + inner: RwLock, +} + +#[derive(Debug)] +struct NamesInner { + /// All available names from the `tzdata` file. + names: Vec>, + /// The version string read from the `tzdata` file. + version: ArrayStr<5>, + /// Scratch space used to help amortize allocation when extracting names + /// from a `tzdata` file. + scratch: Vec, + /// The expiration time of these cached names. + /// + /// Note that this is a necessary but not sufficient criterion for + /// invalidating the cached value. + ttl: Duration, + /// The time at which the data in `names` becomes stale. + expiration: Expiration, +} + +impl Names { + /// See commnents in `tz/db/zoneinfo/enabled.rs` about this. We just copied + /// it from there. + const DEFAULT_TTL: Duration = DEFAULT_TTL; + + /// Create a new collection of names from the concatenated `tzdata` file + /// path given. + /// + /// If no names of time zones could be found in the given directory, then + /// an error is returned. + fn new(path: &Path) -> Result { + let path = path.to_path_buf(); + let mut scratch = vec![]; + let (names, version) = read_names_and_version(&path, &mut scratch)?; + trace!( + "found concatenated tzdata at {path} \ + with version {version} and {len} \ + IANA time zone identifiers", + path = path.display(), + len = names.len(), + ); + let ttl = Names::DEFAULT_TTL; + let expiration = Expiration::after(ttl); + let inner = NamesInner { names, version, scratch, ttl, expiration }; + Ok(Names { inner: RwLock::new(inner) }) + } + + /// Returns all available time zone names after attempting a refresh of + /// the underlying data if it's stale. + fn available(&self, path: &Path) -> Vec { + let mut inner = self.inner.write().unwrap(); + inner.attempt_refresh(path); + inner.available() + } + + fn reset(&self) { + self.inner.write().unwrap().reset(); + } +} + +impl NamesInner { + /// Returns all available time zone names. + fn available(&self) -> Vec { + self.names.iter().map(|name| name.to_string()).collect() + } + + /// Attempts a refresh, but only follows through if the TTL has been + /// exceeded. + /// + /// The caller must ensure that the other cache invalidation criteria + /// have been upheld. For example, this should only be called for a missed + /// zone name lookup. + fn attempt_refresh(&mut self, path: &Path) { + if self.expiration.is_expired() { + self.refresh(path); + } + } + + /// Forcefully refreshes the cached names with possibly new data from disk. + /// If an error occurs when fetching the names, then no names are updated + /// (but the `expires_at` is updated). This will also emit a warning log on + /// failure. + fn refresh(&mut self, path: &Path) { + // PERF: Should we try to move this tzdb handling to run outside of a + // lock? It probably happens pretty rarely, so it might not matter. + let result = read_names_and_version(path, &mut self.scratch); + self.expiration = Expiration::after(self.ttl); + match result { + Ok((names, version)) => { + trace!( + "refreshed concatenated tzdata at {path} \ + with version {version} and {len} \ + IANA time zone identifiers", + path = path.display(), + len = names.len(), + ); + self.names = names; + self.version = version; + } + Err(_err) => { + warn!( + "failed to refresh concatenated time zone name cache \ + for {path}: {_err}", + path = path.display(), + ) + } + } + } + + /// Resets the state such that the next lookup is guaranteed to force a + /// cache refresh, and that it is impossible for any data to be stale. + fn reset(&mut self) { + // This will force the next lookup to fail. + self.names.clear(); + // And this will force the next failed lookup to result in a refresh. + self.expiration = Expiration::expired(); + } +} + +/// A type representing how to find a `tzdata` file. +/// +/// This currently only supports an Android-centric lookup via env vars, but if +/// we wanted to check a fixed path like we do for `ZoneInfo`, then adding a +/// `Fixed` variant here would be appropriate. +#[derive(Debug)] +enum TzdataLocation { + Env { name: &'static str, default: &'static str, suffix: &'static str }, +} + +impl TzdataLocation { + /// Converts this location to an actual path, which might involve an + /// environment variable lookup. + fn to_path_buf(&self) -> PathBuf { + match *self { + TzdataLocation::Env { name, default, suffix } => { + let var = std::env::var_os(name) + .unwrap_or_else(|| OsString::from(default)); + let prefix = PathBuf::from(var); + prefix.join(suffix) + } + } + } +} + +/// Reads only the IANA time zone identifiers from the given path (and the +/// version of the database). +/// +/// The `scratch` given is used to help amortize allocation when deserializing +/// names from the concatenated `tzdata` file. +/// +/// This returns an error if reading was successful but no names were found. +fn read_names_and_version( + path: &Path, + scratch: &mut Vec, +) -> Result<(Vec>, ArrayStr<5>), Error> { + let file = File::open(path).map_err(|e| Error::io(e).path(path))?; + let db = ConcatenatedTzif::open(file)?; + let names: Vec> = + db.available(scratch)?.into_iter().map(Arc::from).collect(); + if names.is_empty() { + return Err(err!( + "found no IANA time zone identifiers in \ + concatenated tzdata file at {path}", + path = path.display(), + )); + } + Ok((names, db.version())) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// DEBUG COMMAND + /// + /// Takes environment variable `JIFF_DEBUG_ZONEINFO_DIR` as input and + /// prints a list of all time zone names in the directory (one per line). + /// + /// Callers may also set `RUST_LOG` to get extra debugging output. + #[test] + fn debug_tzdata_list() -> anyhow::Result<()> { + let _ = crate::logging::Logger::init(); + + const ENV: &str = "JIFF_DEBUG_CONCATENATED_TZDATA"; + let Some(val) = std::env::var_os(ENV) else { return Ok(()) }; + let path = PathBuf::from(val); + let db = Concatenated::from_path(&path)?; + for name in db.available() { + std::eprintln!("{name}"); + } + Ok(()) + } +} diff --git a/src/tz/db/concatenated/mod.rs b/src/tz/db/concatenated/mod.rs new file mode 100644 index 00000000..f6c7eba3 --- /dev/null +++ b/src/tz/db/concatenated/mod.rs @@ -0,0 +1,8 @@ +pub(crate) use self::inner::*; + +#[cfg(not(feature = "tzdb-concatenated"))] +#[path = "disabled.rs"] +mod inner; +#[cfg(feature = "tzdb-concatenated")] +#[path = "enabled.rs"] +mod inner; diff --git a/src/tz/db/mod.rs b/src/tz/db/mod.rs index 0d5ed756..cdaa3f0e 100644 --- a/src/tz/db/mod.rs +++ b/src/tz/db/mod.rs @@ -4,9 +4,12 @@ use crate::{ util::sync::Arc, }; -use self::{bundled::BundledZoneInfo, zoneinfo::ZoneInfo}; +use self::{ + bundled::BundledZoneInfo, concatenated::Concatenated, zoneinfo::ZoneInfo, +}; mod bundled; +mod concatenated; mod zoneinfo; /// Returns a copy of the global [`TimeZoneDatabase`]. @@ -190,6 +193,7 @@ pub struct TimeZoneDatabase { #[cfg_attr(not(feature = "alloc"), derive(Clone))] struct TimeZoneDatabaseInner { zoneinfo: ZoneInfo, + concatenated: Concatenated, bundled: BundledZoneInfo, } @@ -242,9 +246,17 @@ impl TimeZoneDatabase { /// not have a canonical installation of the Time Zone Database. pub fn from_env() -> TimeZoneDatabase { let zoneinfo = ZoneInfo::from_env(); + let concatenated = Concatenated::from_env(); let bundled = BundledZoneInfo::new(); - let inner = TimeZoneDatabaseInner { zoneinfo, bundled }; - TimeZoneDatabase { inner: Some(Arc::new(inner)) } + let inner = TimeZoneDatabaseInner { zoneinfo, concatenated, bundled }; + let db = TimeZoneDatabase { inner: Some(Arc::new(inner)) }; + if db.is_definitively_empty() { + warn!( + "could not find zoneinfo, concatenated tzdata or \ + bundled time zone database", + ); + } + db } /// Returns a time zone database initialized from the given directory. @@ -266,10 +278,63 @@ impl TimeZoneDatabase { pub fn from_dir>( path: P, ) -> Result { - let zoneinfo = ZoneInfo::from_dir(path.as_ref())?; + let path = path.as_ref(); + let zoneinfo = ZoneInfo::from_dir(path)?; + let concatenated = Concatenated::none(); let bundled = BundledZoneInfo::new(); - let inner = TimeZoneDatabaseInner { zoneinfo, bundled }; - Ok(TimeZoneDatabase { inner: Some(Arc::new(inner)) }) + let inner = TimeZoneDatabaseInner { zoneinfo, concatenated, bundled }; + let db = TimeZoneDatabase { inner: Some(Arc::new(inner)) }; + if db.is_definitively_empty() { + warn!( + "could not find zoneinfo data at directory {path} \ + (and there is no bundled time zone database)", + path = path.display(), + ); + } + Ok(db) + } + + /// Returns a time zone database initialized from a path pointing to a + /// concatenated `tzdata` file. This type of format is only known to be + /// found on Android environments. The specific format for this file isn't + /// defined formally anywhere, but Jiff parses the same format supported + /// by the [Android Platform]. + /// + /// Unlike [`TimeZoneDatabase::from_env`], this always attempts to look for + /// a copy of the Time Zone Database at the path given. And if it + /// fails to find one at that path, then an error is returned. + /// + /// Basically, you should use this when you need to use a _specific_ + /// copy of the Time Zone Database in its concatenated format, and use + /// `TimeZoneDatabase::from_env` when you just want Jiff to try and "do the + /// right thing for you." (`TimeZoneDatabase::from_env` will attempt to + /// automatically detect the presence of a system concatenated `tzdata` + /// file on Android.) + /// + /// # Errors + /// + /// This returns an error if the given path does not contain a valid + /// copy of the concatenated Time Zone Database. + /// + /// [Android Platform]: https://android.googlesource.com/platform/libcore/+/jb-mr2-release/luni/src/main/java/libcore/util/ZoneInfoDB.java + #[cfg(feature = "std")] + pub fn from_concatenated_path>( + path: P, + ) -> Result { + let path = path.as_ref(); + let zoneinfo = ZoneInfo::none(); + let concatenated = Concatenated::from_path(path)?; + let bundled = BundledZoneInfo::new(); + let inner = TimeZoneDatabaseInner { zoneinfo, concatenated, bundled }; + let db = TimeZoneDatabase { inner: Some(Arc::new(inner)) }; + if db.is_definitively_empty() { + warn!( + "could not find concatenated tzdata in file {path} \ + (and there is no bundled time zone database)", + path = path.display(), + ); + } + Ok(db) } /// Returns a [`TimeZone`] corresponding to the IANA time zone identifier @@ -308,14 +373,15 @@ impl TimeZoneDatabase { } })?; if let Some(tz) = inner.zoneinfo.get(name) { - trace!( - "found time zone {name} in system zoneinfo ({:?}) database", - inner.zoneinfo, - ); + trace!("found time zone `{name}` in {:?}", inner.zoneinfo); + return Ok(tz); + } + if let Some(tz) = inner.concatenated.get(name) { + trace!("found time zone `{name}` in {:?}", inner.concatenated); return Ok(tz); } if let Some(tz) = inner.bundled.get(name) { - trace!("found time zone {name} in bundled zoneinfo database"); + trace!("found time zone `{name}` in {:?}", inner.bundled); return Ok(tz); } Err(err!("failed to find time zone `{name}` in time zone database")) @@ -347,6 +413,7 @@ impl TimeZoneDatabase { }; }; let mut all = inner.zoneinfo.available(); + all.extend(inner.concatenated.available()); all.extend(inner.bundled.available()); all.sort(); all.dedup(); @@ -365,6 +432,7 @@ impl TimeZoneDatabase { pub fn reset(&self) { let Some(inner) = self.inner.as_deref() else { return }; inner.zoneinfo.reset(); + inner.concatenated.reset(); inner.bundled.reset(); } @@ -388,6 +456,7 @@ impl TimeZoneDatabase { pub fn is_definitively_empty(&self) -> bool { let Some(inner) = self.inner.as_deref() else { return true }; inner.zoneinfo.is_definitively_empty() + && inner.concatenated.is_definitively_empty() && inner.bundled.is_definitively_empty() } } @@ -398,11 +467,11 @@ impl core::fmt::Debug for TimeZoneDatabase { let Some(inner) = self.inner.as_deref() else { return write!(f, "unavailable)"); }; - write!(f, "system={:?}", inner.zoneinfo)?; - if !inner.bundled.is_definitively_empty() { - write!(f, " and bundled")?; - } - write!(f, ")")?; + write!( + f, + "{:?}, {:?}, {:?}", + inner.zoneinfo, inner.concatenated, inner.bundled + )?; Ok(()) } } diff --git a/src/tz/db/zoneinfo/disabled.rs b/src/tz/db/zoneinfo/disabled.rs index de501f36..a580feff 100644 --- a/src/tz/db/zoneinfo/disabled.rs +++ b/src/tz/db/zoneinfo/disabled.rs @@ -15,7 +15,7 @@ impl ZoneInfo { Err(crate::error::err!( "system tzdb unavailable: \ crate feature `tzdb-zoneinfo` is disabled, \ - tzdb lookup for {dir} has therefore failed", + opening tzdb at {dir} has therefore failed", dir = dir.display(), )) } diff --git a/src/tz/db/zoneinfo/enabled.rs b/src/tz/db/zoneinfo/enabled.rs index 49b7e6d7..7ef52fd1 100644 --- a/src/tz/db/zoneinfo/enabled.rs +++ b/src/tz/db/zoneinfo/enabled.rs @@ -1,5 +1,3 @@ -use core::cmp::Ordering; - use alloc::{ string::{String, ToString}, vec, @@ -18,7 +16,7 @@ use crate::{ error::{err, Error}, timestamp::Timestamp, tz::{tzif::is_possibly_tzif, TimeZone}, - util::{cache::Expiration, parse}, + util::{self, cache::Expiration, parse, utf8}, }; const DEFAULT_TTL: Duration = Duration::new(5 * 60, 0); @@ -36,10 +34,13 @@ impl ZoneInfo { pub(crate) fn from_env() -> ZoneInfo { if let Some(tzdir) = std::env::var_os("TZDIR") { let tzdir = PathBuf::from(tzdir); - debug!("opening zoneinfo database at TZDIR={}", tzdir.display()); + trace!("opening zoneinfo database at TZDIR={}", tzdir.display()); match ZoneInfo::from_dir(&tzdir) { Ok(db) => return db, Err(_err) => { + // This is a WARN because it represents a failure to + // satisfy a more direct request, which should be louder + // than failures related to auto-detection. warn!("failed opening TZDIR={}: {_err}", tzdir.display()); // fall through to attempt default directories } @@ -47,15 +48,15 @@ impl ZoneInfo { } for dir in ZONEINFO_DIRECTORIES { let tzdir = Path::new(dir); - debug!("opening zoneinfo database at {}", tzdir.display()); + trace!("opening zoneinfo database at {}", tzdir.display()); match ZoneInfo::from_dir(&tzdir) { Ok(db) => return db, Err(_err) => { - debug!("failed opening {}: {_err}", tzdir.display()); + trace!("failed opening {}: {_err}", tzdir.display()); } } } - warn!( + debug!( "could not find zoneinfo database at any of the following \ paths: {}", ZONEINFO_DIRECTORIES.join(", "), @@ -70,7 +71,7 @@ impl ZoneInfo { } /// Creates a "dummy" zoneinfo database in which all lookups fail. - fn none() -> ZoneInfo { + pub(crate) fn none() -> ZoneInfo { let dir = None; let names = None; let zones = RwLock::new(CachedZones::new()); @@ -216,7 +217,7 @@ impl CachedZones { fn get_zone_index(&self, query: &str) -> Result { self.zones.binary_search_by(|zone| { - cmp_ignore_ascii_case(zone.name.lower(), query) + utf8::cmp_ignore_ascii_case(zone.name.lower(), query) }) } @@ -251,7 +252,7 @@ impl CachedTimeZone { let tz = TimeZone::tzif(&info.inner.original, &data) .map_err(|e| e.path(path))?; let name = info.clone(); - let last_modified = last_modified_from_file(path, &file); + let last_modified = util::fs::last_modified_from_file(path, &file); let expiration = Expiration::after(ttl); Ok(CachedTimeZone { tz, name, expiration, last_modified }) } @@ -278,7 +279,7 @@ impl CachedTimeZone { // should always fail revalidation? I suppose a case could be made to // do the opposite: always pass revalidation. let Some(old_last_modified) = self.last_modified else { - info!( + trace!( "revalidation for {} failed because old last modified time \ is unavailable", info.inner.full.display(), @@ -286,9 +287,9 @@ impl CachedTimeZone { return false; }; let Some(new_last_modified) = - last_modified_from_path(&info.inner.full) + util::fs::last_modified_from_path(&info.inner.full) else { - info!( + trace!( "revalidation for {} failed because new last modified time \ is unavailable", info.inner.full.display(), @@ -297,7 +298,7 @@ impl CachedTimeZone { }; // We consider any change to invalidate cache. if old_last_modified != new_last_modified { - info!( + trace!( "revalidation for {} failed because last modified times \ do not match: old = {} != {} = new", info.inner.full.display(), @@ -426,7 +427,9 @@ impl ZoneInfoNamesInner { /// `None` is returned if one isn't found. fn get(&self, query: &str) -> Option { self.names - .binary_search_by(|n| cmp_ignore_ascii_case(&n.inner.lower, query)) + .binary_search_by(|n| { + utf8::cmp_ignore_ascii_case(&n.inner.lower, query) + }) .ok() .map(|i| self.names[i].clone()) } @@ -550,70 +553,6 @@ impl core::hash::Hash for ZoneInfoName { } } -/// Returns the last modified time for the given file path as a Jiff timestamp. -/// -/// If there was a problem accessing the last modified time or if it could not -/// fit in a Jiff timestamp, then a warning message is logged and `None` is -/// returned. -fn last_modified_from_path(path: &Path) -> Option { - let file = match File::open(path) { - Ok(file) => file, - Err(_err) => { - warn!( - "failed to open file to get last modified time {}: {_err}", - path.display(), - ); - return None; - } - }; - last_modified_from_file(path, &file) -} - -/// Returns the last modified time for the given file as a Jiff timestamp. -/// -/// If there was a problem accessing the last modified time or if it could not -/// fit in a Jiff timestamp, then a warning message is logged and `None` is -/// returned. -/// -/// The path given should be the path to the given file. It is used for -/// diagnostic purposes. -fn last_modified_from_file(_path: &Path, file: &File) -> Option { - let md = match file.metadata() { - Ok(md) => md, - Err(_err) => { - warn!( - "failed to get metadata (for last modified time) \ - for {}: {_err}", - _path.display(), - ); - return None; - } - }; - let systime = match md.modified() { - Ok(systime) => systime, - Err(_err) => { - warn!( - "failed to get last modified time for {}: {_err}", - _path.display() - ); - return None; - } - }; - let timestamp = match Timestamp::try_from(systime) { - Ok(timestamp) => timestamp, - Err(_err) => { - warn!( - "system time {systime:?} out of bounds \ - for Jiff timestamp for last modified time \ - from {}: {_err}", - _path.display(), - ); - return None; - } - }; - Some(timestamp) -} - /// Recursively walks the given directory and returns the names of all time /// zones found. /// @@ -639,7 +578,7 @@ fn walk(start: &Path) -> Result, Error> { let readdir = match dir.read_dir() { Ok(readdir) => readdir, Err(err) => { - info!( + trace!( "error when reading {} as a directory: {err}", dir.display() ); @@ -651,7 +590,7 @@ fn walk(start: &Path) -> Result, Error> { let dent = match result { Ok(dent) => dent, Err(err) => { - info!( + trace!( "error when reading directory entry from {}: {err}", dir.display() ); @@ -663,7 +602,7 @@ fn walk(start: &Path) -> Result, Error> { Ok(file_type) => file_type, Err(err) => { let path = dent.path(); - info!( + trace!( "error when reading file type from {}: {err}", path.display() ); @@ -687,14 +626,14 @@ fn walk(start: &Path) -> Result, Error> { let mut f = match File::open(&path) { Ok(f) => f, Err(err) => { - info!("failed to open {}: {err}", path.display()); + trace!("failed to open {}: {err}", path.display()); seterr(&path, Error::io(err)); continue; } }; let mut buf = [0; 4]; if let Err(err) = f.read_exact(&mut buf) { - info!( + trace!( "failed to read first 4 bytes of {}: {err}", path.display() ); @@ -716,7 +655,7 @@ fn walk(start: &Path) -> Result, Error> { let time_zone_name = match path.strip_prefix(start) { Ok(time_zone_name) => time_zone_name, Err(err) => { - info!( + trace!( "failed to extract time zone name from {} \ using {} as a base: {err}", path.display(), @@ -751,34 +690,6 @@ fn walk(start: &Path) -> Result, Error> { } } -/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`. -fn cmp_ignore_ascii_case(s1: &str, s2: &str) -> Ordering { - // This function used to look like this: - // - // let it1 = s1.as_bytes().iter().map(|&b| b.to_ascii_lowercase()); - // let it2 = s2.as_bytes().iter().map(|&b| b.to_ascii_lowercase()); - // it1.cmp(it2) - // - // But the code below seems to do better in microbenchmarks. - // - // TODO: Experiment with a HashMap, probably using FNV. We can use it - // here since this code is only present when std is present. We will need - // a wrapper type that does ASCII case insensitive comparisons. - let (bytes1, bytes2) = (s1.as_bytes(), s2.as_bytes()); - let mut i = 0; - loop { - let b1 = bytes1.get(i).copied().map(|b| b.to_ascii_lowercase()); - let b2 = bytes2.get(i).copied().map(|b| b.to_ascii_lowercase()); - match (b1, b2) { - (None, None) => return Ordering::Equal, - (Some(_), None) => return Ordering::Greater, - (None, Some(_)) => return Ordering::Less, - (Some(b1), Some(b2)) if b1 == b2 => i += 1, - (Some(b1), Some(b2)) => return b1.cmp(&b2), - } - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/tz/mod.rs b/src/tz/mod.rs index 0986813b..e41ca919 100644 --- a/src/tz/mod.rs +++ b/src/tz/mod.rs @@ -100,6 +100,8 @@ pub use self::{ offset::{Dst, Offset, OffsetArithmetic, OffsetConflict}, }; +#[cfg(feature = "alloc")] +mod concatenated; mod db; mod offset; #[cfg(feature = "alloc")] diff --git a/src/tz/system/android.rs b/src/tz/system/android.rs new file mode 100644 index 00000000..d75b1ee7 --- /dev/null +++ b/src/tz/system/android.rs @@ -0,0 +1,309 @@ +use std::{ffi::c_void, sync::OnceLock}; + +use alloc::vec::Vec; + +use core::{ffi::CStr, mem, ptr::NonNull}; + +use crate::{ + tz::{TimeZone, TimeZoneDatabase}, + util::escape, +}; + +/// Attempts to find the default "system" time zone. +pub(super) fn get(db: &TimeZoneDatabase) -> Option { + static PROPERTY_NAME: &str = "persist.sys.timezone\0"; + + static GETTER: OnceLock> = OnceLock::new(); + let Some(getter) = GETTER.get_or_init(|| PropertyGetter::new()) else { + // We don't emit any messages here because `PropertyGetter::new()` will + // have already done so. + return None; + }; + let tzname = getter.get(cstr(PROPERTY_NAME))?; + let Some(tzname) = core::str::from_utf8(&tzname).ok() else { + warn!( + "found `{PROPERTY_NAME}` name `{name}` on Android, \ + but it's not valid UTF-8", + name = escape::Bytes(&tzname), + ); + return None; + }; + let tz = match db.get(tzname) { + Ok(tz) => tz, + Err(_err) => { + warn!( + "found `{PROPERTY_NAME}` name `{tzname}` on Android, \ + but could not find it in time zone database {db:?}", + ); + return None; + } + }; + debug!( + "found system time zone `{tzname}` from Android property \ + `{PROPERTY_NAME}` and found entry for it in time zone \ + database {db:?}", + ); + Some(tz) +} + +/// Given a path to a system default TZif file, return its corresponding +/// time zone. +/// +/// This doesn't do any symlink shenanigans like in other Unix environments, +/// although we could consider doing that. I think probably this is very +/// unlikely to be used on Android, although it can be by setting `TZ`. +pub(super) fn read(db: &TimeZoneDatabase, path: &str) -> Option { + match super::read_unnamed_tzif_file(path) { + Ok(tz) => Some(tz), + Err(_err) => { + trace!("failed to read {path} as unnamed time zone: {_err}"); + None + } + } +} + +/// An abstraction for safely reading Android system properties. +/// +/// Initialization of this should only be done once. Initialization +/// dynamically loads `libc`. But it does not read any properties. Instead, +/// we permit the time zone property to be looked up repeatedly, in case it +/// changes. So, our `libc` library handle remains invariant, but the values +/// of properties can change over the lifetime of the process. +/// +/// This copies the technique used by the `android-system-properties` crate. +/// Namely, we use `dlopen` instead of hard-coding our linking requirements +/// since this is apparently more flexible. I guess in the past, the hard-coded +/// extern functions broke, but this technique doesn't. Or at least, it "fails +/// gracefully" since this won't result in build errors but just runtime +/// errors that result in no time zone being found (and thus will result in an +/// automatic fallback to UTC). +/// +/// Our implementation of this idea is perhaps a bit simpler than what +/// `android-system-properties` does though. We don't bother supporting >10 +/// year old versions of Android. (Although support for that could be added +/// if there was a real need.) Also, when this fails, we give better error +/// messages via `dlerror()` in the logs. +struct PropertyGetter { + /// A `dlopen` handle to `libc.so`. + /// + /// Note that since this is a bespoke property getter and we only ever + /// create a single instance in a process global static, this never gets + /// dropped. So we don't bother writing a `Drop` impl that calls `dlclose`. + libc: NonNull, + system_property_find: SystemPropertyFind, + system_property_read: SystemPropertyRead, +} + +// SAFETY: It is presumably safe to call functions derived from `dlsym` +// symbols from multiple threads simultaneously. And it is presumably safe +// to call Android's property getter APIs from multiple threads simultaneously. +// This isn't technically documented (as far as I can see), but it would be +// crazytown if this weren't true. +unsafe impl Send for PropertyGetter {} +// SAFETY: It is presumably safe to call functions derived from `dlsym` +// symbols from multiple threads simultaneously. And it is presumably safe +// to call Android's property getter APIs from multiple threads simultaneously. +// This isn't technically documented (as far as I can see), but it would be +// crazytown if this weren't true. +unsafe impl Sync for PropertyGetter {} + +impl PropertyGetter { + /// Creates a new property getter by `dlopen`'ing `libc.so`. + /// + /// If this fails for whatever reason, `None` is returned and WARN-level + /// log messages are emitted stating the reason for failure if it is known. + fn new() -> Option { + // SAFETY: OK because we provide a valid NUL terminated string. + let handle = unsafe { dlopen(cstr("libc.so\0").as_ptr(), 0) }; + let Some(libc) = NonNull::new(handle) else { + warn!( + "could not open libc.so via `dlopen`: {err}", + err = escape::Bytes(&dlerror_message()), + ); + return None; + }; + + // SAFETY: Our `SystemPropertyFind` type definition matches what is + // declared in `include/sys/system_properties.h` on Android. + let system_property_find: SystemPropertyFind = + unsafe { load_symbol(libc, cstr("__system_property_find\0"))? }; + + // SAFETY: Our `SystemPropertyRead` type definition matches what is + // declared in `include/sys/system_properties.h` on Android. + let system_property_read: SystemPropertyRead = unsafe { + load_symbol(libc, cstr("__system_property_read_callback\0"))? + }; + + Some(PropertyGetter { + libc, + system_property_find, + system_property_read, + }) + } + + /// Reads the given property name into the `Vec` returned. + /// + /// If the property doesn't exist, then `None` is returned and a WARN-level + /// log message is emitted explaining why. + fn get(&self, name: &CStr) -> Option> { + unsafe extern "C" fn callback( + buf: *mut c_void, + _name: *const i8, + value: *const i8, + _serial: u32, + ) { + let buf = buf.cast::>(); + // SAFETY: The implied contract of `__system_property_read_callback` + // is that `value` is a valid NUL terminated C string. + let value = unsafe { CStr::from_ptr(value) }; + // SAFETY: We passed a valid `*mut Vec` to the callback, so + // casting it back to it is safe. + unsafe { + (*buf).extend_from_slice(value.to_bytes()); + } + } + + // SAFETY: `name` is a valid NUL terminated string and + // `system_property_find` is a valid function read from `dlsym` + // according to the declaration in `include/sys/system_properties.h`. + let prop_info = unsafe { (self.system_property_find)(name.as_ptr()) }; + if prop_info.is_null() { + warn!( + "Android property name `{name}` not found", + name = escape::Bytes(name.to_bytes()), + ); + return None; + } + + // N.B. A `prop_info` is an opaque pointer[1]... which means the + // implementation is probably allocating something in order to create + // it... right? But there's no API to free the pointer returned. And + // no other indication as to its lifetime. Once again, C is awful. + // + // [1]: https://android.googlesource.com/platform/bionic/+/master/libc/include/sys/system_properties.h#44 + + let mut buf = Vec::new(); + // SAFETY: `name` is a valid NUL terminated string and + // `system_property_find` is a valid function read from `dlsym` + // according to the declaration in `include/sys/system_properties.h`. + unsafe { + let buf: *mut Vec = &mut buf; + (self.system_property_read)( + prop_info, + callback, + buf.cast::(), + ); + } + if buf.is_empty() { + warn!( + "reading Android property `{name}` resulted in empty value", + name = escape::Bytes(name.to_bytes()), + ); + return None; + } + Some(buf) + } +} + +/// Loads a function symbol, of type `F`, from the given `dlopen` handle. +/// +/// If this fails, then `handle` is closed and `None` is returned and a +/// WARN-level log message is emitted with an error message if possible. +/// +/// # Safety +/// +/// Callers must ensure that `F` is a function type that matches the ABI of +/// the `symbol` in `handle`. +unsafe fn load_symbol(handle: NonNull, symbol: &CStr) -> Option { + let sym = + // SAFETY: We know `handle` is non-null. + unsafe { dlsym(handle.as_ptr(), symbol.as_ptr()) }; + if sym.is_null() { + // SAFETY: We know `handle` is non-null. + let _ = unsafe { dlclose(handle.as_ptr()) }; + warn!( + "could not load `{symbol}` \ + symbol from `libc.so: {err}", + symbol = escape::Bytes(symbol.to_bytes()), + err = escape::Bytes(&dlerror_message()), + ); + return None; + } + // SAFETY: The safety obligation here is forwarded to the caller. They + // must guarantee that `F` is an appropriate type. + // declared in `include/sys/system_properties.h` on Android. + let function = unsafe { mem::transmute_copy::<*mut c_void, F>(&sym) }; + Some(function) +} + +/// Returns the error message given by `dlerror`. +/// +/// Callers should only use this when they expect an error to have occurred +/// with one of the `dl*` APIs. If `dlerror` returns a null pointer, then a +/// generic "unknwon error" message is returned. +fn dlerror_message() -> Vec { + // SAFETY: I believe `dlerror()` is always safe to call. + let msg = unsafe { dlerror() }; + if msg.is_null() { + return b"unknown error".to_vec(); + } + // SAFETY: We've verified that `msg` is not null and the contract of + // `dlerror` says that it returns a NUL terminated C string. Moreover, + // we do not hold on to this string and instead copy it to the heap + // immediately. + // + // One wonders if `dlerror()` is actually sound in this context. While + // Jiff can guarantee that itself will call `dlerror()` in only one + // thread, Jiff can't prevent other parts of the process from calling + // `dlerror()`. In particular, `dlerror(3)` says: + // + // > The message returned by dlerror() may reside in a statically allocated + // > buffer that is overwritten by subsequent dlerror() calls. + // + // But no mention is made about whether this statically allocated buffer + // is written to in a thread safe way. Or whether the string returned to + // the caller can be unceremoniously overwritten by a simultaneously + // executing thread. + // + // However, in practice, this could be sound if the libc in use is doing + // something sensible like using a thread local. Then I believe this is + // fine. + // + // My goodness C is awful. If this turns out to be unsound, then since + // `dlerror()` isn't essential, we'll probably just have to stop using it. + // So dumb. + // + // Note that in theory the error path should never be exercised. + let cstr = unsafe { CStr::from_ptr(msg) }; + cstr.to_bytes().to_vec() +} + +/// Creates a C string "literal" and returns it as a raw pointer. +/// +/// This panics if the string given contains a NUL byte. +fn cstr(string: &'static str) -> &'static CStr { + CStr::from_bytes_with_nul(string.as_bytes()).unwrap() +} + +// We just define the FFI bindings ourselves instead of bringing in libc for +// this. We're only doing it for one platform, so it doesn't seem like a huge +// deal. But if this turns out to be a problem in practice, I'm fine accepting +// a target specific dependency on `libc` for Android. +extern "C" { + fn dlopen(filename: *const i8, flag: i32) -> *mut c_void; + fn dlclose(handle: *mut c_void) -> i32; + fn dlerror() -> *mut i8; + fn dlsym(handle: *mut c_void, symbol: *const i8) -> *mut c_void; +} + +// These types come from: +// https://android.googlesource.com/platform/bionic/+/master/libc/include/sys/system_properties.h +type PropInfo = c_void; +type SystemPropertyFind = unsafe extern "C" fn(*const i8) -> *const PropInfo; +type SystemPropertyRead = unsafe extern "C" fn( + *const PropInfo, + SystemPropertyReadCallback, + *mut c_void, +); +type SystemPropertyReadCallback = + unsafe extern "C" fn(*mut c_void, *const i8, *const i8, u32); diff --git a/src/tz/system/mod.rs b/src/tz/system/mod.rs index b744a190..4643b06b 100644 --- a/src/tz/system/mod.rs +++ b/src/tz/system/mod.rs @@ -8,10 +8,14 @@ use crate::{ util::{cache::Expiration, sync::Arc}, }; -#[cfg(unix)] +#[cfg(all(unix, not(target_os = "android")))] #[path = "unix.rs"] mod sys; +#[cfg(all(unix, target_os = "android"))] +#[path = "android.rs"] +mod sys; + #[cfg(windows)] #[path = "windows/mod.rs"] mod sys; diff --git a/src/tz/system/windows/mod.rs b/src/tz/system/windows/mod.rs index 87772534..ec23143e 100644 --- a/src/tz/system/windows/mod.rs +++ b/src/tz/system/windows/mod.rs @@ -10,10 +10,12 @@ use windows_sys::Win32::System::Time::{ use crate::{ error::{err, Error, ErrorContext}, tz::{TimeZone, TimeZoneDatabase}, + util::utf8, }; use self::windows_zones::WINDOWS_TO_IANA; +#[allow(dead_code)] // we don't currently read the version mod windows_zones; /// Attempts to find the default "system" time zone. @@ -74,7 +76,7 @@ pub(super) fn read(_db: &TimeZoneDatabase, path: &str) -> Option { fn windows_to_iana(tz_key_name: &str) -> Result<&'static str, Error> { let result = WINDOWS_TO_IANA.binary_search_by(|(win_name, _)| { - cmp_ignore_ascii_case(win_name, &tz_key_name) + utf8::cmp_ignore_ascii_case(win_name, &tz_key_name) }); let Ok(index) = result else { return Err(err!( @@ -126,13 +128,6 @@ fn nul_terminated_utf16_to_string( Ok(string) } -/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`. -fn cmp_ignore_ascii_case(s1: &str, s2: &str) -> core::cmp::Ordering { - let it1 = s1.as_bytes().iter().map(|&b| b.to_ascii_lowercase()); - let it2 = s2.as_bytes().iter().map(|&b| b.to_ascii_lowercase()); - it1.cmp(it2) -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/tz/testdata.rs b/src/tz/testdata.rs index 458a6eec..f3978a40 100644 --- a/src/tz/testdata.rs +++ b/src/tz/testdata.rs @@ -2,6 +2,12 @@ use alloc::string::ToString; use crate::tz::tzif::Tzif; +/// A concatenated list of TZif data with a header and an index block. +/// +/// This was exactracted from an Android emulator file system via `adb`. +pub(crate) static ANDROID_CONCATENATED_TZIF: &'static [u8] = + include_bytes!("testdata/android/tzdata"); + /// A list of all TZif files in our testdata directory. /// /// Feel free to add more if there are other "interesting" cases. Note that @@ -13,7 +19,7 @@ use crate::tz::tzif::Tzif; /// * 2024-03-27: Initial set pulled from my local copy of `tzdata 2024a`. /// * 2024-07-05: Added `UTC`. /// * 2024-11-30: Added special Sydney time zone from RHEL8. -pub(crate) const TZIF_TEST_FILES: &[TzifTestFile] = &[ +pub(crate) static TZIF_TEST_FILES: &[TzifTestFile] = &[ TzifTestFile { name: "America/New_York", data: include_bytes!("testdata/america-new-york.tzif"), diff --git a/src/tz/testdata/android/tz_version b/src/tz/testdata/android/tz_version new file mode 100644 index 00000000..d4b02162 --- /dev/null +++ b/src/tz/testdata/android/tz_version @@ -0,0 +1 @@ +008.001|2024a|001 \ No newline at end of file diff --git a/src/tz/testdata/android/tzdata b/src/tz/testdata/android/tzdata new file mode 100644 index 00000000..7f21189a Binary files /dev/null and b/src/tz/testdata/android/tzdata differ diff --git a/src/util/fs.rs b/src/util/fs.rs new file mode 100644 index 00000000..f93c0ebc --- /dev/null +++ b/src/util/fs.rs @@ -0,0 +1,70 @@ +use std::{fs::File, path::Path}; + +use crate::Timestamp; + +/// Returns the last modified time for the given file path as a Jiff timestamp. +/// +/// If there was a problem accessing the last modified time or if it could not +/// fit in a Jiff timestamp, then a warning message is logged and `None` is +/// returned. +pub(crate) fn last_modified_from_path(path: &Path) -> Option { + let file = match File::open(path) { + Ok(file) => file, + Err(_err) => { + warn!( + "failed to open file to get last modified time {}: {_err}", + path.display(), + ); + return None; + } + }; + last_modified_from_file(path, &file) +} + +/// Returns the last modified time for the given file as a Jiff timestamp. +/// +/// If there was a problem accessing the last modified time or if it could not +/// fit in a Jiff timestamp, then a warning message is logged and `None` is +/// returned. +/// +/// The path given should be the path to the given file. It is used for +/// diagnostic purposes. +pub(crate) fn last_modified_from_file( + _path: &Path, + file: &File, +) -> Option { + let md = match file.metadata() { + Ok(md) => md, + Err(_err) => { + warn!( + "failed to get metadata (for last modified time) \ + for {}: {_err}", + _path.display(), + ); + return None; + } + }; + let systime = match md.modified() { + Ok(systime) => systime, + Err(_err) => { + warn!( + "failed to get last modified time for {}: {_err}", + _path.display() + ); + return None; + } + }; + let timestamp = match Timestamp::try_from(systime) { + Ok(timestamp) => timestamp, + Err(_err) => { + warn!( + "system time {systime:?} out of bounds \ + for Jiff timestamp for last modified time \ + from {}: {_err}", + _path.display(), + ); + return None; + } + }; + Some(timestamp) +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 96aedff8..d3c160a0 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -5,6 +5,8 @@ pub(crate) mod cache; pub(crate) mod common; pub(crate) mod crc32; pub(crate) mod escape; +#[cfg(feature = "std")] +pub(crate) mod fs; #[cfg(not(feature = "std"))] pub(crate) mod libm; pub(crate) mod parse; diff --git a/src/util/utf8.rs b/src/util/utf8.rs index 25cb3337..b1dad7bc 100644 --- a/src/util/utf8.rs +++ b/src/util/utf8.rs @@ -1,3 +1,5 @@ +use core::cmp::Ordering; + /// Decodes the next UTF-8 encoded codepoint from the given byte slice. /// /// If no valid encoding of a codepoint exists at the beginning of the given @@ -26,6 +28,37 @@ pub(crate) fn decode(bytes: &[u8]) -> Option> { } } +/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`. +#[inline] +pub(crate) fn cmp_ignore_ascii_case(s1: &str, s2: &str) -> Ordering { + cmp_ignore_ascii_case_bytes(s1.as_bytes(), s2.as_bytes()) +} + +/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering` on +/// `&[u8]`. +#[inline] +pub(crate) fn cmp_ignore_ascii_case_bytes(s1: &[u8], s2: &[u8]) -> Ordering { + // This function used to look like this: + // + // let it1 = s1.iter().map(|&b| b.to_ascii_lowercase()); + // let it2 = s2.iter().map(|&b| b.to_ascii_lowercase()); + // it1.cmp(it2) + // + // But the code below seems to do better in microbenchmarks. + let mut i = 0; + loop { + let b1 = s1.get(i).copied().map(|b| b.to_ascii_lowercase()); + let b2 = s2.get(i).copied().map(|b| b.to_ascii_lowercase()); + match (b1, b2) { + (None, None) => return Ordering::Equal, + (Some(_), None) => return Ordering::Greater, + (None, Some(_)) => return Ordering::Less, + (Some(b1), Some(b2)) if b1 == b2 => i += 1, + (Some(b1), Some(b2)) => return b1.cmp(&b2), + } + } +} + /// Given a UTF-8 leading byte, this returns the total number of code units /// in the following encoded codepoint. ///