diff options
| author | Peter Klausler <pklausler@nvidia.com> | 2025-09-23 15:45:35 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-23 15:45:35 -0700 |
| commit | ea5262ffc95a767cb5a4b0ea6f04857e4aa00c46 (patch) | |
| tree | 3ebc2497683baa29974a63ef42fd52a8e5bb9176 /flang-rt | |
| parent | 286f45189bbb06a4e6a3fc1c3f92bfce486f3ebe (diff) | |
[flang][runtime] Use dumber but faster check for list-directed repeti… (#159867)
…tion
When scanning list-directed input for nulls and repetition counts, the
current library depends on having each record be prescanned for the
presence of asterisk characters. It turns out that the overhead of
calling memchr(...,'*',...) on each record doesn't pay off, especially
on systems without SIMD-vectorized memchr implementations -- even on
those, it's faster (about 10%) to just scan ahead for asterisks when
decimal digits are encountered. Only when an asterisk is present, which
is not common, should we then bother to convert the digits to their
integer value.
Diffstat (limited to 'flang-rt')
| -rw-r--r-- | flang-rt/include/flang-rt/runtime/io-stmt.h | 26 | ||||
| -rw-r--r-- | flang-rt/lib/runtime/io-stmt.cpp | 4 |
2 files changed, 18 insertions, 12 deletions
diff --git a/flang-rt/include/flang-rt/runtime/io-stmt.h b/flang-rt/include/flang-rt/runtime/io-stmt.h index 03b6efd65ddf..1c4f06c0a708 100644 --- a/flang-rt/include/flang-rt/runtime/io-stmt.h +++ b/flang-rt/include/flang-rt/runtime/io-stmt.h @@ -149,9 +149,7 @@ public: : connection_{connection} {} RT_API_ATTRS FastAsciiField( ConnectionState &connection, const char *start, std::size_t bytes) - : connection_{connection}, at_{start}, limit_{start + bytes} { - CheckForAsterisk(); - } + : connection_{connection}, at_{start}, limit_{start + bytes} {} RT_API_ATTRS ConnectionState &connection() { return connection_; } RT_API_ATTRS std::size_t got() const { return got_; } @@ -168,7 +166,6 @@ public: if (at_) { if (std::size_t bytes{io.GetNextInputBytes(at_)}) { limit_ = at_ + bytes; - CheckForAsterisk(); } else { at_ = limit_ = nullptr; } @@ -181,19 +178,28 @@ public: } connection_.HandleRelativePosition(bytes); } - RT_API_ATTRS bool MightHaveAsterisk() const { return !at_ || hasAsterisk_; } - private: - RT_API_ATTRS void CheckForAsterisk() { - hasAsterisk_ = at_ && at_ < limit_ && - runtime::memchr(at_, '*', limit_ - at_) != nullptr; + // Could there be a list-directed repetition count here? + RT_API_ATTRS bool MightBeRepetitionCount() const { + if (!at_) { + return true; // must use slow path for internal KIND/=1 input + } else { + if (const char *p{at_}; *p >= '0' && *p <= '9') { + while (++p < limit_) { + if (*p < '0' || *p > '9') { + return *p == '*'; + } + } + } + return false; + } } + private: ConnectionState &connection_; const char *at_{nullptr}; const char *limit_{nullptr}; std::size_t got_{0}; // for READ(..., SIZE=) - bool hasAsterisk_{false}; }; RT_API_ATTRS FastAsciiField GetUpcomingFastAsciiField(); diff --git a/flang-rt/lib/runtime/io-stmt.cpp b/flang-rt/lib/runtime/io-stmt.cpp index e260c0ca7511..7bcba5fe71ee 100644 --- a/flang-rt/lib/runtime/io-stmt.cpp +++ b/flang-rt/lib/runtime/io-stmt.cpp @@ -905,8 +905,8 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit( if (imaginaryPart_) { // can't repeat components return edit; } - if (*ch >= '0' && *ch <= '9' && fastField.MightHaveAsterisk()) { - // look for "r*" repetition count + if (*ch >= '0' && *ch <= '9' && fastField.MightBeRepetitionCount()) { + // There's decimal digits followed by '*'. auto start{fastField.connection().positionInRecord}; int r{0}; do { |
