Skip to content

Commit

Permalink
Add update script and update property map
Browse files Browse the repository at this point in the history
See #7983
  • Loading branch information
discordier committed May 24, 2022
1 parent e4efd03 commit ef90170
Show file tree
Hide file tree
Showing 4 changed files with 574 additions and 158 deletions.
145 changes: 145 additions & 0 deletions bin/update-property-map.php
@@ -0,0 +1,145 @@
#!/usr/bin/env php
<?php

declare(strict_types=1);

// Original Idea for the code in here came from:
// https://github.com/phan/phan/blob/93c1c2/src/Phan/Language/Internal/PropertyMap.php#L49
// We differ however:
// 1. we parse the XML and extract original class and property names instead of the normalized identifiers.
// 2. We ignore non-parsable files.
//
// What we are currently missing is properly parsing of <xi:include> directives.

set_error_handler(function ($num, $str, $file, $line, $context = null): void {
throw new ErrorException($str, 0, $num, $file, $line);
});

$docDir = realpath(__DIR__ . '/../build/doc-en');

if (false === $docDir) {
echo 'PHP doc not found!' . PHP_EOL;
echo 'Please execute: git clone git@github.com:php/doc-en.git ' . dirname(__DIR__) . '/build/doc-en';
}

$files = iterator_to_array(
new RegexIterator(
new RecursiveIteratorIterator(
new RecursiveDirectoryIterator(
$docDir,
FilesystemIterator::CURRENT_AS_PATHNAME|FilesystemIterator::SKIP_DOTS
),
RecursiveIteratorIterator::LEAVES_ONLY
),
'/.*.xml$/'
)
);

$classes = require_once dirname(__DIR__) . '/dictionaries/ManualPropertyMap.php';
libxml_use_internal_errors(true);
foreach ($files as $file) {
$contents = file_get_contents($file);
// FIXME: find a way to ignore custom entities, for now we strip them.
$contents = preg_replace('#&[a-zA-Z\d.\-_]+;#', '', $contents);
$contents = preg_replace('#%[a-zA-Z\d.\-_]+;#', '', $contents);
$contents = preg_replace('#<!ENTITY[^>]+>#', '', $contents);
try {
$simple = new SimpleXMLElement($contents);
} catch (Throwable $exception) {
// FIXME: we ignore files with XML errors at the moment because the input XML is not always sober.
// Examples are rpminfo/entities.functions.xml, wkhtmltox/wkhtmltox/bits/web.xml,
// wkhtmltox/wkhtmltox/bits/load.xml
echo sprintf(
"%1\$s: Ignoring %2\$s: %3\$s\n%4\$s",
$file,
get_class($exception),
$exception->getMessage(),
implode("\n", array_map(function (LibXMLError $error): string {
return $error->message;
}, libxml_get_errors()))
);
libxml_clear_errors();
continue;
}

$namespaces = $simple->getNamespaces();
$simple->registerXPathNamespace('docbook', 'http://docbook.org/ns/docbook');
foreach ($simple->xpath('//docbook:classsynopsis') as $classSynopsis) {
$classSynopsis->registerXPathNamespace('docbook', 'http://docbook.org/ns/docbook');
$class = strtolower((string) $classSynopsis->xpath('./docbook:ooclass/docbook:classname')[0]);
foreach ($classSynopsis->xpath('//docbook:fieldsynopsis') as $item) {
assert($item instanceof SimpleXMLElement);
$property = strtolower((string) $item->varname);
if (isset($classes[$class][$property])) {
continue;
}

$type = $item->type[0];
if (null === $type) {
continue;
}
assert($type instanceof SimpleXMLElement);
$typeClass = $type->attributes(/*'http://docbook.org/ns/docbook'*/)->class;
if (null === $typeClass) {
$type = (string) $type;
} elseif ('union' === (string) $typeClass) {
$types = [];
foreach ($type as $subType) {
$types[] = (string) $subType;
}
$type = implode('|', $types);
}
switch ($type) {
case '':
// Some properties are not properly defined - we ignore them then.
continue 2;
// case 'integer':
// $type = 'int';
default:
}
$modifier = (string) $item->modifier;
// We do not want to handle constants... I guess?!
if ('const' === $modifier) {
continue;
}

$classes[$class][$property] = $type;
}
}
}

function serializeArray(array $array, string $prefix): string
{
uksort($array, function (string $first, string $second): int {
return strtolower($first) <=> strtolower($second);
});
$result = "[\n";
$localPrefix = $prefix . ' ';
foreach ($array as $key => $value) {
$result .= $localPrefix . var_export((string) $key, true) . ' => ' .
(is_array($value)
? serializeArray($value, $localPrefix)
: var_export($value, true)) . ",\n";
}
$result .= $prefix . ']';

return $result;
}

$serialized = serializeArray($classes, '');
file_put_contents(
dirname(__DIR__) . '/dictionaries/PropertyMap.php',
<<<EOF
<?php
namespace Psalm\Internal;
/**
* Automatically created by bin/update-property-map.php
*
* Please do not modify - adapt the override constants in above file instead.
*/
return $serialized;
EOF
);
145 changes: 145 additions & 0 deletions dictionaries/ManualPropertyMap.php
@@ -0,0 +1,145 @@
<?php
namespace Psalm\Internal;

/**
* This file holds manually defined property maps, which are not added to the
* official PHP docs and therefore can not be automatically updated by
* bin/update-property-map.php.
*
* If you change this file, please run bin/update-property-map.php to keep
* PropertyMap.php in sync.
*/

return [
//
// Incorrectly documented classes from here on.
// Revise these against the current state of the docs from time to time.
//
'dateinterval' => [
// documented as 'mixed' in doc-en/reference/datetime/dateinterval.xml:90.
'days' => 'false|int',
],
'domnode' => [
// documented as 'DomNodeList' in doc-en/reference/dom/domnode.xml:57.
'childnodes' => 'DomNodeList<DomNode>'
],
'tidy' => [
// documented via <xi:include> in doc-en/reference/tidy/tidy.xml:33
'errorbuffer' => 'string',
],
//
// Undocumented classes from here on.
//
'phpparser\\node\\expr\\array_' => [
'items' => 'array<int, PhpParser\\Node\\Expr\\ArrayItem|null>',
],
'phpparser\\node\\expr\\arrowfunction' => [
'params' => 'list<PhpParser\\Node\\Param>',
],
'phpparser\\node\\expr\\closure' => [
'params' => 'list<PhpParser\\Node\\Param>',
],
'phpparser\\node\\expr\\list_' => [
'items' => 'array<int, PhpParser\\Node\\Expr\\ArrayItem|null>',
],
'phpparser\\node\\expr\\shellexec' => [
'parts' => 'list<PhpParser\\Node>',
],
'phpparser\\node\\matcharm' => [
'conds' => 'null|non-empty-list<PhpParser\\Node\\Expr>',
],
'phpparser\\node\\name' => [
'parts' => 'non-empty-list<non-empty-string>',
],
'phpparser\\node\\stmt\\case_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\catch_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\class_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\do_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\else_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\elseif_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\finally_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\for_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\foreach_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\if_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\interface_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\namespace_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\trait_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\trycatch' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'phpparser\\node\\stmt\\while_' => [
'stmts' => 'list<PhpParser\\Node\\Stmt>',
],
'rdkafka\\message' => [
'err' => 'int',
'headers' => 'array<string, string>|null',
'key' => 'string|null',
'offset' => 'int',
'partition' => 'int',
'payload' => 'string',
'timestamp' => 'int',
'topic_name' => 'string',
],

//
// Legacy extensions that got removed.
//
'mongoclient' => [
'connected' => 'boolean',
'status' => 'string',
],
'mongocollection' => [
'db' => 'MongoDB',
'w' => 'integer',
'wtimeout' => 'integer',
],
'mongocursor' => [
'slaveokay' => 'boolean',
'timeout' => 'integer',
],
'mongodb' => [
'w' => 'integer',
'wtimeout' => 'integer',
],
'mongodb-driver-exception-writeexception' => [
'writeresult' => 'MongoDBDriverWriteResult',
],
'mongoid' => [
'id' => 'string',
],
'mongoint32' => [
'value' => 'string',
],
'mongoint64' => [
'value' => 'string',
],
'tokyotyrantexception' => [
'code' => 'int',
],
];

0 comments on commit ef90170

Please sign in to comment.