From e3fe9040ab716fea1f8dc4475353106d99772952 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Wed, 13 May 2020 11:59:18 +0200 Subject: [PATCH] C++: Improved class name detection (#2348) --- components/prism-cpp.js | 78 +++++++++---- components/prism-cpp.min.js | 2 +- tests/languages/cpp/base-clause_feature.test | 117 +++++++++++++++++++ tests/languages/cpp/class-name_feature.test | 39 ++++++- tests/languages/cpp/issue2347.test | 72 ++++++++++++ 5 files changed, 286 insertions(+), 22 deletions(-) create mode 100644 tests/languages/cpp/base-clause_feature.test create mode 100644 tests/languages/cpp/issue2347.test diff --git a/components/prism-cpp.js b/components/prism-cpp.js index 8903840101..0de2ed5b5f 100644 --- a/components/prism-cpp.js +++ b/components/prism-cpp.js @@ -1,21 +1,59 @@ -Prism.languages.cpp = Prism.languages.extend('c', { - 'class-name': { - pattern: /(\b(?:class|enum|struct)\s+)(?!class|enum|struct)\w+/, - lookbehind: true - }, - 'keyword': /\b(?:alignas|alignof|asm|auto|bool|break|case|catch|char|char8_t|char16_t|char32_t|class|compl|concept|const|consteval|constexpr|constinit|const_cast|continue|co_await|co_return|co_yield|decltype|default|delete|do|double|dynamic_cast|else|enum|explicit|export|extern|float|for|friend|goto|if|inline|int|int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|long|mutable|namespace|new|noexcept|nullptr|operator|private|protected|public|register|reinterpret_cast|requires|return|short|signed|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\b/, - 'number': { - pattern: /(?:\b0b[01']+|\b0x(?:[\da-f']+\.?[\da-f']*|\.[\da-f']+)(?:p[+-]?[\d']+)?|(?:\b[\d']+\.?[\d']*|\B\.[\d']+)(?:e[+-]?[\d']+)?)[ful]*/i, - greedy: true - }, - 'operator': />>=?|<<=?|->|([-+&|:])\1|[?:~]|[-+*/%&|^!=<>]=?|\b(?:and|and_eq|bitand|bitor|not|not_eq|or|or_eq|xor|xor_eq)\b/, - 'boolean': /\b(?:true|false)\b/ -}); +(function (Prism) { -Prism.languages.insertBefore('cpp', 'string', { - 'raw-string': { - pattern: /R"([^()\\ ]{0,16})\([\s\S]*?\)\1"/, - alias: 'string', - greedy: true - } -}); + var keyword = /\b(?:alignas|alignof|asm|auto|bool|break|case|catch|char|char8_t|char16_t|char32_t|class|compl|concept|const|consteval|constexpr|constinit|const_cast|continue|co_await|co_return|co_yield|decltype|default|delete|do|double|dynamic_cast|else|enum|explicit|export|extern|float|for|friend|goto|if|inline|int|int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|long|mutable|namespace|new|noexcept|nullptr|operator|private|protected|public|register|reinterpret_cast|requires|return|short|signed|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\b/; + + Prism.languages.cpp = Prism.languages.extend('c', { + 'class-name': [ + { + pattern: RegExp(/(\b(?:class|enum|struct|typename)\s+)(?!)\w+/.source + .replace(//g, function () { return keyword.source; })), + lookbehind: true + }, + // This is intended to capture the class name of method implementations like: + // void foo::bar() const {} + // However! The `foo` in the above example could also be a namespace, so we only capture the class name if + // it starts with an uppercase letter. This approximation should give decent results. + /\b[A-Z]\w*(?=\s*::\s*\w+\s*\()/, + // This will capture the class name before destructors like: + // Foo::~Foo() {} + /\b[A-Z_]\w*(?=\s*::\s*~\w+\s*\()/i, + { + // This also intends to capture the class name of method implementations but here the class has template + // parameters, so it can't be a namespace (until C++ adds generic namespaces). + pattern: /\w+(?=\s*<(?:[^<>]|<(?:[^<>]|<[^<>]*>)*>)*>\s*::\s*\w+\s*\()/, + inside: null // see below + } + ], + 'keyword': keyword, + 'number': { + pattern: /(?:\b0b[01']+|\b0x(?:[\da-f']+\.?[\da-f']*|\.[\da-f']+)(?:p[+-]?[\d']+)?|(?:\b[\d']+\.?[\d']*|\B\.[\d']+)(?:e[+-]?[\d']+)?)[ful]*/i, + greedy: true + }, + 'operator': />>=?|<<=?|->|([-+&|:])\1|[?:~]|[-+*/%&|^!=<>]=?|\b(?:and|and_eq|bitand|bitor|not|not_eq|or|or_eq|xor|xor_eq)\b/, + 'boolean': /\b(?:true|false)\b/ + }); + + Prism.languages.insertBefore('cpp', 'string', { + 'raw-string': { + pattern: /R"([^()\\ ]{0,16})\([\s\S]*?\)\1"/, + alias: 'string', + greedy: true + } + }); + + Prism.languages.insertBefore('cpp', 'class-name', { + // the base clause is an optional list of parent classes + // https://en.cppreference.com/w/cpp/language/class + 'base-clause': { + pattern: /(\b(?:class|struct)\s+\w+\s*:\s*)(?:[^;{}"'])+?(?=\s*[;{])/, + lookbehind: true, + greedy: true, + inside: Prism.languages.extend('cpp', {}) + } + }); + Prism.languages.insertBefore('inside', 'operator', { + // All untokenized words that are not namespaces should be class names + 'class-name': /\b[a-z_]\w*\b(?!\s*::)/i + }, Prism.languages.cpp['base-clause']); + +}(Prism)); diff --git a/components/prism-cpp.min.js b/components/prism-cpp.min.js index 2f0af4f881..f31882471c 100644 --- a/components/prism-cpp.min.js +++ b/components/prism-cpp.min.js @@ -1 +1 @@ -Prism.languages.cpp=Prism.languages.extend("c",{"class-name":{pattern:/(\b(?:class|enum|struct)\s+)(?!class|enum|struct)\w+/,lookbehind:!0},keyword:/\b(?:alignas|alignof|asm|auto|bool|break|case|catch|char|char8_t|char16_t|char32_t|class|compl|concept|const|consteval|constexpr|constinit|const_cast|continue|co_await|co_return|co_yield|decltype|default|delete|do|double|dynamic_cast|else|enum|explicit|export|extern|float|for|friend|goto|if|inline|int|int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|long|mutable|namespace|new|noexcept|nullptr|operator|private|protected|public|register|reinterpret_cast|requires|return|short|signed|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\b/,number:{pattern:/(?:\b0b[01']+|\b0x(?:[\da-f']+\.?[\da-f']*|\.[\da-f']+)(?:p[+-]?[\d']+)?|(?:\b[\d']+\.?[\d']*|\B\.[\d']+)(?:e[+-]?[\d']+)?)[ful]*/i,greedy:!0},operator:/>>=?|<<=?|->|([-+&|:])\1|[?:~]|[-+*/%&|^!=<>]=?|\b(?:and|and_eq|bitand|bitor|not|not_eq|or|or_eq|xor|xor_eq)\b/,boolean:/\b(?:true|false)\b/}),Prism.languages.insertBefore("cpp","string",{"raw-string":{pattern:/R"([^()\\ ]{0,16})\([\s\S]*?\)\1"/,alias:"string",greedy:!0}}); \ No newline at end of file +!function(e){var t=/\b(?:alignas|alignof|asm|auto|bool|break|case|catch|char|char8_t|char16_t|char32_t|class|compl|concept|const|consteval|constexpr|constinit|const_cast|continue|co_await|co_return|co_yield|decltype|default|delete|do|double|dynamic_cast|else|enum|explicit|export|extern|float|for|friend|goto|if|inline|int|int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|long|mutable|namespace|new|noexcept|nullptr|operator|private|protected|public|register|reinterpret_cast|requires|return|short|signed|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\b/;e.languages.cpp=e.languages.extend("c",{"class-name":[{pattern:RegExp("(\\b(?:class|enum|struct|typename)\\s+)(?!)\\w+".replace(//g,function(){return t.source})),lookbehind:!0},/\b[A-Z]\w*(?=\s*::\s*\w+\s*\()/,/\b[A-Z_]\w*(?=\s*::\s*~\w+\s*\()/i,{pattern:/\w+(?=\s*<(?:[^<>]|<(?:[^<>]|<[^<>]*>)*>)*>\s*::\s*\w+\s*\()/,inside:null}],keyword:t,number:{pattern:/(?:\b0b[01']+|\b0x(?:[\da-f']+\.?[\da-f']*|\.[\da-f']+)(?:p[+-]?[\d']+)?|(?:\b[\d']+\.?[\d']*|\B\.[\d']+)(?:e[+-]?[\d']+)?)[ful]*/i,greedy:!0},operator:/>>=?|<<=?|->|([-+&|:])\1|[?:~]|[-+*/%&|^!=<>]=?|\b(?:and|and_eq|bitand|bitor|not|not_eq|or|or_eq|xor|xor_eq)\b/,boolean:/\b(?:true|false)\b/}),e.languages.insertBefore("cpp","string",{"raw-string":{pattern:/R"([^()\\ ]{0,16})\([\s\S]*?\)\1"/,alias:"string",greedy:!0}}),e.languages.insertBefore("cpp","class-name",{"base-clause":{pattern:/(\b(?:class|struct)\s+\w+\s*:\s*)(?:[^;{}"'])+?(?=\s*[;{])/,lookbehind:!0,greedy:!0,inside:e.languages.extend("cpp",{})}}),e.languages.insertBefore("inside","operator",{"class-name":/\b[a-z_]\w*\b(?!\s*::)/i},e.languages.cpp["base-clause"])}(Prism); \ No newline at end of file diff --git a/tests/languages/cpp/base-clause_feature.test b/tests/languages/cpp/base-clause_feature.test new file mode 100644 index 0000000000..4d9247bbc4 --- /dev/null +++ b/tests/languages/cpp/base-clause_feature.test @@ -0,0 +1,117 @@ +struct Base {}; +struct Derived : Base {}; +struct Derived : private Base; +class X : public virtual B {}; +class Y : virtual public B {}; +class Y : virtual baz::B {}; +class Z : public B; +struct AA : X, Y, foo::bar::Z {}; + +class service : private Transport // comment +{}; + +---------------------------------------------------- + +[ + ["keyword", "struct"], + ["class-name", "Base"], + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", ";"], + ["keyword", "struct"], + ["class-name", "Derived"], + ["operator", ":"], + ["base-clause", [ + ["class-name", "Base"] + ]], + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", ";"], + ["keyword", "struct"], + ["class-name", "Derived"], + ["operator", ":"], + ["base-clause", [ + ["keyword", "private"], + ["class-name", "Base"] + ]], + ["punctuation", ";"], + ["keyword", "class"], + ["class-name", "X"], + ["operator", ":"], + ["base-clause", [ + ["keyword", "public"], + ["keyword", "virtual"], + ["class-name", "B"] + ]], + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", ";"], + ["keyword", "class"], + ["class-name", "Y"], + ["operator", ":"], + ["base-clause", [ + ["keyword", "virtual"], + ["keyword", "public"], + ["class-name", "B"] + ]], + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", ";"], + ["keyword", "class"], + ["class-name", "Y"], + ["operator", ":"], + ["base-clause", [ + ["keyword", "virtual"], + " baz", + ["operator", "::"], + ["class-name", "B"] + ]], + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", ";"], + ["keyword", "class"], + ["class-name", "Z"], + ["operator", ":"], + ["base-clause", [ + ["keyword", "public"], + ["class-name", "B"], + ["operator", "<"], + "foo", + ["operator", "::"], + ["class-name", "T"], + ["operator", ">"] + ]], + ["punctuation", ";"], + ["keyword", "struct"], + ["class-name", "AA"], + ["operator", ":"], + ["base-clause", [ + ["class-name", "X"], + ["punctuation", ","], + ["class-name", "Y"], + ["punctuation", ","], + " foo", + ["operator", "::"], + "bar", + ["operator", "::"], + ["class-name", "Z"] + ]], + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", ";"], + ["keyword", "class"], + ["class-name", "service"], + ["operator", ":"], + ["base-clause", [ + ["keyword", "private"], + ["class-name", "Transport"], + ["comment", "// comment"] + ]], + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", ";"] +] + +---------------------------------------------------- + +Checks for the base clauses of classes and structs. diff --git a/tests/languages/cpp/class-name_feature.test b/tests/languages/cpp/class-name_feature.test index 2f79a4cd9a..c3b7a01db7 100644 --- a/tests/languages/cpp/class-name_feature.test +++ b/tests/languages/cpp/class-name_feature.test @@ -3,6 +3,11 @@ class Foo_bar struct foo enum bar enum class FooBar +template + +void Foo::bar() {} +Foo::~Foo() {} +void Foo::bar() {} ---------------------------------------------------- @@ -11,7 +16,39 @@ enum class FooBar ["keyword", "class"], ["class-name", "Foo_bar"], ["keyword", "struct"], ["class-name", "foo"], ["keyword", "enum"], ["class-name", "bar"], - ["keyword", "enum"], ["keyword", "class"], ["class-name", "FooBar"] + ["keyword", "enum"], ["keyword", "class"], ["class-name", "FooBar"], + ["keyword", "template"], ["operator", "<"], ["keyword", "typename"], ["class-name", "FooBar"], ["operator", ">"], + + + ["keyword", "void"], + ["class-name", "Foo"], + ["operator", "::"], + ["function", "bar"], + ["punctuation", "("], + ["punctuation", ")"], + ["punctuation", "{"], + ["punctuation", "}"], + + ["class-name", "Foo"], + ["operator", "::"], + ["operator", "~"], + ["function", "Foo"], + ["punctuation", "("], + ["punctuation", ")"], + ["punctuation", "{"], + ["punctuation", "}"], + + ["keyword", "void"], + ["class-name", "Foo"], + ["operator", "<"], + ["keyword", "int"], + ["operator", ">"], + ["operator", "::"], + ["function", "bar"], + ["punctuation", "("], + ["punctuation", ")"], + ["punctuation", "{"], + ["punctuation", "}"] ] ---------------------------------------------------- diff --git a/tests/languages/cpp/issue2347.test b/tests/languages/cpp/issue2347.test new file mode 100644 index 0000000000..9f302201b6 --- /dev/null +++ b/tests/languages/cpp/issue2347.test @@ -0,0 +1,72 @@ +class MainWindow : public QMainWindow +{ + Q_OBJECT + + private slots: + void changeWindowTitle(); +}; +void MainWindow::changeWindowTitle() +{ + setWindowTitle(plainTextEdit->currentFile.split("/").last() + " - Notepanda"); +} + +---------------------------------------------------- + +[ + ["keyword", "class"], + ["class-name", "MainWindow"], + ["operator", ":"], + ["base-clause", [ + ["keyword", "public"], + ["class-name", "QMainWindow"] + ]], + + ["punctuation", "{"], + + "\n Q_OBJECT\n\n ", + + ["keyword", "private"], + " slots", + ["operator", ":"], + + ["keyword", "void"], + ["function", "changeWindowTitle"], + ["punctuation", "("], + ["punctuation", ")"], + ["punctuation", ";"], + + ["punctuation", "}"], + ["punctuation", ";"], + + ["keyword", "void"], + ["class-name", "MainWindow"], + ["operator", "::"], + ["function", "changeWindowTitle"], + ["punctuation", "("], + ["punctuation", ")"], + + ["punctuation", "{"], + + ["function", "setWindowTitle"], + ["punctuation", "("], + "plainTextEdit", + ["operator", "->"], + "currentFile", + ["punctuation", "."], + ["function", "split"], + ["punctuation", "("], + ["string", "\"/\""], + ["punctuation", ")"], + ["punctuation", "."], + ["function", "last"], + ["punctuation", "("], + ["punctuation", ")"], + ["operator", "+"], + ["string", "\" - Notepanda\""], + ["punctuation", ")"], + ["punctuation", ";"], + + ["punctuation", "}"] +] + +----------------------------------------------------