Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed ANTLRInputStream and ANTLRFileStream #3113

Merged
merged 11 commits into from
Mar 10, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,8 @@ protected void writeParserTestFile(String parserName, String lexerName,
+ "\n"
+ "\n"
+ "int main(int argc, const char* argv[]) {\n"
+ " ANTLRFileStream input(argv[1]);\n"
+ " ANTLRFileStream input;\n"
+ " input.loadFromFile(argv[1]);\n"
+ " <lexerName> lexer(&input);\n"
+ " CommonTokenStream tokens(&lexer);\n"
+ "<createParser>"
Expand Down Expand Up @@ -436,7 +437,8 @@ protected void writeLexerTestFile(String lexerName, boolean showDFA) {
+ "using namespace antlr4;\n"
+ "\n"
+ "int main(int argc, const char* argv[]) {\n"
+ " ANTLRFileStream input(argv[1]);\n"
+ " ANTLRFileStream input;\n"
+ " input.loadFromFile(argv[1]);\n"
+ " <lexerName> lexer(&input);\n"
+ " CommonTokenStream tokens(&lexer);\n"
+ " tokens.fill();\n"
Expand Down
46 changes: 31 additions & 15 deletions runtime/Cpp/runtime/antlrcpp.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
archiveVersion = 1;
classes = {
};
objectVersion = 46;
objectVersion = 54;
objects = {

/* Begin PBXBuildFile section */
Expand Down Expand Up @@ -2226,7 +2226,7 @@
37D727A21867AF1E007B6D10 /* Project object */ = {
isa = PBXProject;
attributes = {
LastUpgradeCheck = 1030;
LastUpgradeCheck = 1240;
ORGANIZATIONNAME = ANTLR;
TargetAttributes = {
270C67EF1CDB4F1E00116E17 = {
Expand All @@ -2238,7 +2238,7 @@
};
};
buildConfigurationList = 37D727A51867AF1E007B6D10 /* Build configuration list for PBXProject "antlrcpp" */;
compatibilityVersion = "Xcode 3.2";
compatibilityVersion = "Xcode 12.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
Expand Down Expand Up @@ -2751,8 +2751,12 @@
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
INFOPLIST_FILE = "antlrcpp-ios/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.3;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 12.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
"@loader_path/Frameworks",
);
MTL_ENABLE_DEBUG_INFO = YES;
PRODUCT_BUNDLE_IDENTIFIER = "org.antlr.v4.runtime.antlrcpp-ios";
PRODUCT_NAME = "$(TARGET_NAME)";
Expand Down Expand Up @@ -2782,8 +2786,12 @@
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
INFOPLIST_FILE = "antlrcpp-ios/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.3;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 12.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
"@loader_path/Frameworks",
);
MTL_ENABLE_DEBUG_INFO = NO;
PRODUCT_BUNDLE_IDENTIFIER = "org.antlr.v4.runtime.antlrcpp-ios";
PRODUCT_NAME = "$(TARGET_NAME)";
Expand Down Expand Up @@ -2841,8 +2849,7 @@
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
CLANG_CXX_LANGUAGE_STANDARD = "c++0x";
CLANG_CXX_LIBRARY = "libc++";
CLANG_CXX_LANGUAGE_STANDARD = "c++17";
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_WARN_ASSIGN_ENUM = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
Expand All @@ -2857,6 +2864,7 @@
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES;
Expand Down Expand Up @@ -2889,8 +2897,12 @@
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = "src/ thirdparty/utfcpp/source/ thirdparty/utfcpp/source/utf8/";
MACOSX_DEPLOYMENT_TARGET = 10.9;
HEADER_SEARCH_PATHS = (
src/,
thirdparty/utfcpp/source/,
thirdparty/utfcpp/source/utf8/,
);
MACOSX_DEPLOYMENT_TARGET = 11.1;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = macosx;
};
Expand All @@ -2901,8 +2913,7 @@
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
CLANG_CXX_LANGUAGE_STANDARD = "c++0x";
CLANG_CXX_LIBRARY = "libc++";
CLANG_CXX_LANGUAGE_STANDARD = "c++17";
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_WARN_ASSIGN_ENUM = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
Expand All @@ -2917,6 +2928,7 @@
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES;
Expand Down Expand Up @@ -2945,8 +2957,12 @@
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = "src/ thirdparty/utfcpp/source/ thirdparty/utfcpp/source/utf8/";
MACOSX_DEPLOYMENT_TARGET = 10.9;
HEADER_SEARCH_PATHS = (
src/,
thirdparty/utfcpp/source/,
thirdparty/utfcpp/source/utf8/,
);
MACOSX_DEPLOYMENT_TARGET = 11.1;
SDKROOT = macosx;
};
name = Release;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1030"
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1030"
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
Expand Down Expand Up @@ -29,8 +29,6 @@
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
Expand All @@ -51,8 +49,6 @@
ReferencedContainer = "container:antlrcpp.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1030"
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
Expand Down Expand Up @@ -29,8 +29,6 @@
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
Expand All @@ -51,8 +49,6 @@
ReferencedContainer = "container:antlrcpp.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
Expand Down
7 changes: 1 addition & 6 deletions runtime/Cpp/runtime/src/ANTLRFileStream.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
Expand All @@ -9,11 +9,6 @@

using namespace antlr4;

ANTLRFileStream::ANTLRFileStream(const std::string &fileName) {
_fileName = fileName;
loadFromFile(fileName);
}

void ANTLRFileStream::loadFromFile(const std::string &fileName) {
_fileName = fileName;
if (_fileName.empty()) {
Expand Down
14 changes: 8 additions & 6 deletions runtime/Cpp/runtime/src/ANTLRFileStream.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
Expand All @@ -13,15 +13,17 @@ namespace antlr4 {
/// when you construct the object (or call load()).
// TODO: this class needs testing.
class ANTLR4CPP_PUBLIC ANTLRFileStream : public ANTLRInputStream {
protected:
std::string _fileName; // UTF-8 encoded file name.

public:
// Assumes a file name encoded in UTF-8 and file content in the same encoding (with or w/o BOM).
ANTLRFileStream(const std::string &fileName);
ANTLRFileStream(const std::string &) = delete;
ANTLRFileStream(const char *data, size_t length) = delete;
ANTLRFileStream(std::istream &stream) = delete;

// Assumes a file name encoded in UTF-8 and file content in the same encoding (with or w/o BOM).
virtual void loadFromFile(const std::string &fileName);
virtual std::string getSourceName() const override;

private:
std::string _fileName; // UTF-8 encoded file name.
};

} // namespace antlr4
49 changes: 23 additions & 26 deletions runtime/Cpp/runtime/src/ANTLRInputStream.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
Expand All @@ -17,44 +17,41 @@ using namespace antlrcpp;

using misc::Interval;

ANTLRInputStream::ANTLRInputStream() {
InitializeInstanceFields();
}

#if __cplusplus >= 201703L
ANTLRInputStream::ANTLRInputStream(std::string_view input) {
#else
ANTLRInputStream::ANTLRInputStream(const std::string &input) {
ANTLRInputStream::ANTLRInputStream(const std::string_view &input): ANTLRInputStream() {
load(input.data(), input.length());
}
#endif
InitializeInstanceFields();
load(input);

ANTLRInputStream::ANTLRInputStream(const std::string &input): ANTLRInputStream() {
load(input.data(), input.size());
}

ANTLRInputStream::ANTLRInputStream(const char data_[], size_t numberOfActualCharsInArray)
: ANTLRInputStream(std::string(data_, numberOfActualCharsInArray)) {
ANTLRInputStream::ANTLRInputStream(const char *data, size_t length) {
load(data, length);
}

ANTLRInputStream::ANTLRInputStream(std::istream &stream) {
InitializeInstanceFields();
ANTLRInputStream::ANTLRInputStream(std::istream &stream): ANTLRInputStream() {
load(stream);
}

#if __cplusplus >= 201703L
void ANTLRInputStream::load(std::string_view input) {
// Remove the UTF-8 BOM if present.
constexpr std::string_view bom = "\xef\xbb\xbf";
if (input.compare(0, 3, bom) == 0)
input.remove_prefix(3);
_data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size());
p = 0;
}
#else
void ANTLRInputStream::load(const std::string &input) {
load(input.data(), input.size());
}

void ANTLRInputStream::load(const char *data, size_t length) {
// Remove the UTF-8 BOM if present.
const char bom[4] = "\xef\xbb\xbf";
if (input.compare(0, 3, bom, 3) == 0)
_data = antlrcpp::utf8_to_utf32(input.data() + 3, input.data() + input.size());
const char *bom = "\xef\xbb\xbf";
if (length > 3 && strncmp(data, bom, 3) == 0)
Copy link
Contributor

@xTachyon xTachyon Mar 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm just nitpicking at this point but wouldn't an empty bom be a valid empty string that could be valid for some grammars? So the check would be length >= 3 to accept and skip if it's just the bom. Or maybe I don't know my unicode. Sorry for all the disturbance.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, a good code review is often inconvenient.

The approach there is that if there's enough to have a possible BOM then check it, if not just go ahead and load what you got. So even an empty string is "loaded" correctly in the else branch.

I'm assuming here that by "empty bom" you actually mean an empty input string.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's what I mean: if only "\xef\xbb\xbf" is passed in the function, length will be 3 and the else branch will be taken. Then, at least when used with utfcpp conversion functions, it will throw utf8::invalid_code_point because it can't parse the bom.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I also thought about this case and found it would not be worth to be considered as it would mean no useful input was given. However, there could be a workflow where a BOM is always attached automatically, regardless of what input was given and empty input is often valid input. So it would make sense to strip off the BOM and deal with the empty input anyway. I changed the check therefore.

_data = antlrcpp::utf8_to_utf32(data + 3, data + length);
else
_data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size());
_data = antlrcpp::utf8_to_utf32(data, data + length);
p = 0;
}
#endif

void ANTLRInputStream::load(std::istream &stream) {
if (!stream.good() || stream.eof()) // No fail, bad or EOF.
Expand All @@ -63,7 +60,7 @@ void ANTLRInputStream::load(std::istream &stream) {
_data.clear();

std::string s((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
load(s);
load(s.data(), s.length());
}

void ANTLRInputStream::reset() {
Expand Down
17 changes: 8 additions & 9 deletions runtime/Cpp/runtime/src/ANTLRInputStream.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
Expand All @@ -25,19 +25,18 @@ namespace antlr4 {
/// What is name or source of this char stream?
std::string name;

ANTLRInputStream();

#if __cplusplus >= 201703L
ANTLRInputStream(std::string_view input = "");
#else
ANTLRInputStream(const std::string &input = "");
ANTLRInputStream(const std::string_view &input);
#endif
ANTLRInputStream(const char data_[], size_t numberOfActualCharsInArray);

ANTLRInputStream(const std::string &input);
ANTLRInputStream(const char *data, size_t length);
ANTLRInputStream(std::istream &stream);

#if __cplusplus >= 201703L
virtual void load(std::string_view input);
#else
virtual void load(const std::string &input);
#endif
virtual void load(const char *data, size_t length);
virtual void load(std::istream &stream);

/// Reset the stream so that it's in the same state it was
Expand Down
6 changes: 5 additions & 1 deletion runtime/Cpp/runtime/src/atn/ParserATNSimulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,9 @@ Parser* ParserATNSimulator::getParser() {
return parser;
}

#pragma warning (disable:4996) // 'getenv': This function or variable may be unsafe. Consider using _dupenv_s instead.
#ifdef _MSC_VER
#pragma warning (disable:4996) // 'getenv': This function or variable may be unsafe. Consider using _dupenv_s instead.
#endif

bool ParserATNSimulator::getLrLoopSetting() {
char *var = std::getenv("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT");
Expand All @@ -1358,7 +1360,9 @@ bool ParserATNSimulator::getLrLoopSetting() {
return value == "true" || value == "1";
}

#ifdef _MSC_VER
#pragma warning (default:4996)
#endif

void ParserATNSimulator::InitializeInstanceFields() {
_mode = PredictionMode::LL;
Expand Down