LinkTitles/LinkTitles_8body_8php_source.html

 <?php

 /*

  *      Copyright 2012-2014 Daniel Kraus <krada@gmx.net> ('bovender')

  *

  *      This program is free software; you can redistribute it and/or modify

  *      it under the terms of the GNU General Public License as published by

  *      the Free Software Foundation; either version 2 of the License, or

  *      (at your option) any later version.

  *

  *      This program is distributed in the hope that it will be useful,

  *      but WITHOUT ANY WARRANTY; without even the implied warranty of

  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

  *      GNU General Public License for more details.

  *

  *      You should have received a copy of the GNU General Public License

  *      along with this program; if not, write to the Free Software

  *      Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,

  *      MA 02110-1301, USA.

  */


   function dump($var) {

       error_log(print_r($var, TRUE) . "\n", 3, 'php://stderr');

   };


   class LinkTitles {

     private static $currentTitle;


     private static $targetTitle;


     private static $targetContent;


     private static $targetTitleText;


     public static function setup() {

       global $wgLinkTitlesParseOnEdit;

       global $wgLinkTitlesParseOnRender;

       global $wgHooks;

       if ( $wgLinkTitlesParseOnEdit ) {

         $wgHooks['PageContentSave'][] = 'LinkTitles::onPageContentSave';

       };

       if ( $wgLinkTitlesParseOnRender ) {

         $wgHooks['InternalParseBeforeLinks'][] = 'LinkTitles::onInternalParseBeforeLinks';

       };

       $wgHooks['GetDoubleUnderscoreIDs'][] = 'LinkTitles::onGetDoubleUnderscoreIDs';

     }


     public static function onPageContentSave( &$wikiPage, &$user, &$content, &$summary,

         $isMinor, $isWatch, $section, &$flags, &$status ) {


       if ( ! $isMinor ) {

         $title = $wikiPage->getTitle();

         $text = $content->getContentHandler()->serializeContent($content);

         $newText = self::parseContent( $title, $text );

         if ( $newText != $text ) {

           $content = $content->getContentHandler()->unserializeContent( $newText );

         }

       };

       return true;

     }


     public static function onInternalParseBeforeLinks( Parser &$parser, &$text ) {

       $title = $parser->getTitle();

       $text = self::parseContent( $title, $text );

       return true;

     }


     private static function parseContent( Title &$title, &$text ) {

       // If the page contains the magic word '__NOAUTOLINKS__', do not parse it.

       if ( MagicWord::get('MAG_LINKTITLES_NOAUTOLINKS')->match( $text ) ) {

         return $text;

       }


       // Configuration variables need to be defined here as globals.

       global $wgLinkTitlesPreferShortTitles;

       global $wgLinkTitlesMinimumTitleLength;

       global $wgLinkTitlesParseHeadings;

       global $wgLinkTitlesBlackList;

       global $wgLinkTitlesSkipTemplates;

       global $wgLinkTitlesFirstOnly;

       global $wgLinkTitlesWordStartOnly;

       global $wgLinkTitlesWordEndOnly;

       global $wgLinkTitlesSmartMode;

       global $wgCapitalLinks;


       // Use unicode character properties rather than \b escape sequences

       // to detect whole words containing non-ASCII characters as well.

       // Note that this requires the use of the '/u' switch, and you need

       // to have PHP with a PCRE library that was compiled with

       // --enable-unicode-properties

       ( $wgLinkTitlesWordStartOnly ) ? $wordStartDelim = '(?<!\pL)' : $wordStartDelim = '';

       ( $wgLinkTitlesWordEndOnly ) ? $wordEndDelim = '(?!\pL)' : $wordEndDelim = '';


       ( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC';

       ( $wgLinkTitlesFirstOnly ) ? $limit = 1 : $limit = -1;


       if ( $wgLinkTitlesSkipTemplates )

       {

         $templatesDelimiter = '{{[^}]+}}|';

       } else {

         // Match template names (ignoring any piped [[]] links in them)

         // along with the trailing pipe and parameter name or closing

         // braces; also match sequences of '|wordcharacters=' (without

         // spaces in them) that usually only occur as parameter names in

         // transclusions (but could also occur as wiki table cell contents).

         // TODO: Find a way to match parameter names in transclusions, but

         // not in table cells or other sequences involving a pipe character

         // and equal sign.

         $templatesDelimiter = '{{[^|]*?(?:(?:\[\[[^]]+]])?)[^|]*?(?:\|(?:\w+=)?|(?:}}))|\|\w+=|';

       };


       LinkTitles::$currentTitle = $title;

       $newText = $text;


       // Build a regular expression that will capture existing wiki links ("[[...]]"),

       // wiki headings ("= ... =", "== ... ==" etc.),

       // urls ("http://example.com", "[http://example.com]", "[http://example.com Description]",

       // and email addresses ("mail@example.com").

       // Since there is a user option to skip headings, we make this part of the expression

       // optional. Note that in order to use preg_split(), it is important to have only one

       // capturing subpattern (which precludes the use of conditional subpatterns).

       ( $wgLinkTitlesParseHeadings ) ? $delimiter = '' : $delimiter = '=+.+?=+|';

       $urlPattern = '[a-z]+?\:\/\/(?:\S+\.)+\S+(?:\/.*)?';

       $delimiter = '/(' .                           // exclude from linking:

         '\[\[.*?\]\]|' .                            // links

         $delimiter .                                // titles (if requested)

         $templatesDelimiter .                       // templates (if requested)

         '^ .+?\n|\n .+?\n|\n .+?$|^ .+?$|' .        // preformatted text

         '<nowiki>.*?<.nowiki>|<code>.*?<\/code>|' . // nowiki/code

         '<pre>.*?<\/pre>|<html>.*?<\/html>|' .      // pre/html

         '<script>.*?<\/script>|' .                  // script

         '<div.+?>|<\/div>|' .                       // attributes of div elements

         '<span.+?>|<\/span>|' .                     // attributes of span elements

         '<file>[^<]*<\/file>|' .                     // attributes of span elements

         'style=".+?"|class=".+?"|' .                // styles and classes (e.g. of wikitables)

         '\[' . $urlPattern . '\s.+?\]|'. $urlPattern .  '(?=\s|$)|' . // urls

         '(?<=\b)\S+\@(?:\S+\.)+\S+(?=\b)' .        // email addresses

         ')/ism';


       // Build a blacklist of pages that are not supposed to be link

       // targets. This includes the current page.

       $blackList = str_replace( '_', ' ',

         '("' . implode( '", "',$wgLinkTitlesBlackList ) . '", "' .

         LinkTitles::$currentTitle->getDbKey() . '")' );


       // Build an SQL query and fetch all page titles ordered by length from

       // shortest to longest. Only titles from 'normal' pages (namespace uid

       // = 0) are returned. Since the db may be sqlite, we need a try..catch

       // structure because sqlite does not support the CHAR_LENGTH function.

       $dbr = wfGetDB( DB_SLAVE );

       try {

         $res = $dbr->select(

           'page',

           'page_title',

           array(

             'page_namespace = 0',

             'CHAR_LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength,

             'page_title NOT IN ' . $blackList,

           ),

           __METHOD__,

           array( 'ORDER BY' => 'CHAR_LENGTH(page_title) ' . $sort_order )

         );

       } catch (Exception $e) {

         $res = $dbr->select(

           'page',

           'page_title',

           array(

             'page_namespace = 0',

             'LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength,

             'page_title NOT IN ' . $blackList,

           ),

           __METHOD__,

           array( 'ORDER BY' => 'LENGTH(page_title) ' . $sort_order )

         );

       }


       // Iterate through the page titles

       foreach( $res as $row ) {

         LinkTitles::newTarget( $row->page_title );


         // split the page content by [[...]] groups

         // credits to inhan @ StackOverflow for suggesting preg_split

         // see http://stackoverflow.com/questions/10672286

         $arr = preg_split( $delimiter, $newText, -1, PREG_SPLIT_DELIM_CAPTURE );


         // Escape certain special characters in the page title to prevent

         // regexp compilation errors

         LinkTitles::$targetTitleText = LinkTitles::$targetTitle->getText();

         $quotedTitle = preg_quote(LinkTitles::$targetTitleText, '/');


         // Depending on the global configuration setting $wgCapitalLinks,

         // the title has to be searched for either in a strictly case-sensitive

         // way, or in a 'fuzzy' way where the first letter of the title may

         // be either case.

         if ( $wgCapitalLinks && ( $quotedTitle[0] != '\\' )) {

           $searchTerm = '((?i)' . $quotedTitle[0] . '(?-i)' .

             substr($quotedTitle, 1) . ')';

         } else {

           $searchTerm = '(' . $quotedTitle . ')';

         }


         for ( $i = 0; $i < count( $arr ); $i+=2 ) {

           // even indexes will point to text that is not enclosed by brackets

           $arr[$i] = preg_replace_callback( '/(?<![\:\.\@\/\?\&])' .

             $wordStartDelim . $searchTerm . $wordEndDelim . '/u',

             array('LinkTitles', 'simpleModeCallback'), $arr[$i], $limit, $count );

           if (( $limit >= 0 ) && ( $count > 0  )) {

             break;

           };

         };

         $newText = implode( '', $arr );


         // If smart mode is turned on, the extension will perform a second

         // pass on the page and add links with aliases where the case does

         // not match.

         if ($wgLinkTitlesSmartMode) {

           $arr = preg_split( $delimiter, $newText, -1, PREG_SPLIT_DELIM_CAPTURE );


           for ( $i = 0; $i < count( $arr ); $i+=2 ) {

             // even indexes will point to text that is not enclosed by brackets

             $arr[$i] = preg_replace_callback( '/(?<![\:\.\@\/\?\&])' .

               $wordStartDelim . '(' . $quotedTitle . ')' .

               $wordEndDelim . '/iu', array('LinkTitles', 'smartModeCallback'),

               $arr[$i], $limit, $count );

             if (( $limit >= 0 ) && ( $count > 0  )) {

               break;

             };

           };

           $newText = implode( '', $arr );

         } // $wgLinkTitlesSmartMode

       }; // foreach $res as $row

       return $newText;

     }


     public static function processPage($title, RequestContext $context) {

       // TODO: make this namespace-aware

       $titleObj = Title::makeTitle(0, $title);

       $page = WikiPage::factory($titleObj);

       $content = $page->getContent();

       $text = $content->getContentHandler()->serializeContent($content);

       $newText = LinkTitles::parseContent($titleObj, $text);

       if ( $text != $newText ) {

         $content = $content->getContentHandler()->unserializeContent( $newText );

         $page->doQuickEditContent($content,

           $context->getUser(),

           "Links to existing pages added by LinkTitles bot.",

           true // minor modification

         );

       };

     }


     public static function onGetDoubleUnderscoreIDs( array &$doubleUnderscoreIDs ) {

       $doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOTARGET';

       $doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOAUTOLINKS';

       return true;

     }


     // Build an anonymous callback function to be used in simple mode.

     private static function simpleModeCallback( array $matches ) {

       if ( LinkTitles::checkTargetPage() ) {

         return '[[' . $matches[0] . ']]';

       }

       else

       {

         return $matches[0];

       }

     }


     // Callback function for use with preg_replace_callback.

     // This essentially performs a case-sensitive comparison of the

     // current page title and the occurrence found on the page; if

     // the cases do not match, it builds an aliased (piped) link.

     // If $wgCapitalLinks is set to true, the case of the first

     // letter is ignored by MediaWiki and we don't need to build a

     // piped link if only the case of the first letter is different.

     private static function smartModeCallback( array $matches ) {

       global $wgCapitalLinks;


       if ( $wgCapitalLinks ) {

         // With $wgCapitalLinks set to true we have a slightly more

         // complicated version of the callback than if it were false;

         // we need to ignore the first letter of the page titles, as

         // it does not matter for linking.

         if ( LinkTitles::checkTargetPage() ) {

           if ( strcmp(substr(LinkTitles::$targetTitleText, 1), substr($matches[0], 1)) == 0 ) {

             // Case-sensitive match: no need to bulid piped link.

             return '[[' . $matches[0] . ']]';

           } else  {

             // Case-insensitive match: build piped link.

             return '[[' . LinkTitles::$targetTitleText . '|' . $matches[0] . ']]';

           }

         }

         else

         {

           return $matches[0];

         }

       } else {

         // If $wgCapitalLinks is false, we can use the simple variant

         // of the callback function.

         if ( LinkTitles::checkTargetPage() ) {

           if ( strcmp(LinkTitles::$targetTitleText, $matches[0]) == 0 ) {

             // Case-sensitive match: no need to bulid piped link.

             return '[[' . $matches[0] . ']]';

           } else  {

             // Case-insensitive match: build piped link.

             return '[[' . LinkTitles::$targetTitleText . '|' . $matches[0] . ']]';

           }

         }

         else

         {

           return $matches[0];

         }

       }

     }


     private static function newTarget( $title ) {

       // @todo Make this wiki namespace aware.

       LinkTitles::$targetTitle = Title::makeTitle( NS_MAIN, $title);

       LinkTitles::$targetContent = null;

     }


     private static function getTargetContent() {

       if ( ! isset( $targetContent ) ) {

         LinkTitles::$targetContent = WikiPage::factory(

           LinkTitles::$targetTitle)->getContent();

       };

       return LinkTitles::$targetContent;

     }


     private static function checkTargetPage() {

       global $wgLinkTitlesEnableNoTargetMagicWord;

       global $wgLinkTitlesCheckRedirect;


       // If checking for redirects is enabled and the target page does

       // indeed redirect to the current page, return the page title as-is

       // (unlinked).

       if ( $wgLinkTitlesCheckRedirect ) {

         $redirectTitle = LinkTitles::getTargetContent()->getUltimateRedirectTarget();

         if ( $redirectTitle && $redirectTitle->equals(LinkTitles::$currentTitle) ) {

           return false;

         }

       };


       // If the magic word __NOAUTOLINKTARGET__ is enabled and the target

       // page does indeed contain this magic word, return the page title

       // as-is (unlinked).

       if ( $wgLinkTitlesEnableNoTargetMagicWord ) {

         if ( LinkTitles::getTargetContent()->matchMagicWord(

             MagicWord::get('MAG_LINKTITLES_NOTARGET') ) ) {

           return false;

         }

       };

       return true;

     }

   }


 // vim: ts=2:sw=2:noet:comments^=\:///

LinkTitles
Central class of the extension.
Definition: LinkTitles.body.php:31

dump
dump($var)
Helper function for development and debugging.
Definition: LinkTitles.body.php:25

$wgLinkTitlesBlackList
$wgLinkTitlesBlackList
Blacklist of page titles that should never be linked.
Definition: LinkTitles.php:100

$wgLinkTitlesWordEndOnly
$wgLinkTitlesWordEndOnly
Determines whether a page title must end with the end of a word in order for it to be linked...
Definition: LinkTitles.php:138

$wgLinkTitlesFirstOnly
$wgLinkTitlesFirstOnly
Determines whether to link only the first occurrence of a page title on a page or all occurrences...
Definition: LinkTitles.php:106

$wgLinkTitlesEnableNoTargetMagicWord
$wgLinkTitlesEnableNoTargetMagicWord
Determines whether or not the magic word NOAUTOLINKTARGET is enabled or not.
Definition: LinkTitles.php:182

$wgLinkTitlesSkipTemplates
$wgLinkTitlesSkipTemplates
Determines whether to parse text inside templates.
Definition: LinkTitles.php:94

$wgLinkTitlesParseHeadings
$wgLinkTitlesParseHeadings
Determines whether or not to insert links into headings.
Definition: LinkTitles.php:60

$wgLinkTitlesPreferShortTitles
$wgLinkTitlesPreferShortTitles
Controls precedence of page titles.
Definition: LinkTitles.php:51

LinkTitles\onPageContentSave
static onPageContentSave(&$wikiPage, &$user, &$content, &$summary, $isMinor, $isWatch, $section, &$flags, &$status)
Event handler that is hooked to the PageContentSave event.
Definition: LinkTitles.body.php:62

$wgLinkTitlesWordStartOnly
$wgLinkTitlesWordStartOnly
Determines whether a page title must occur at the start of a word in order for it to be linked...
Definition: LinkTitles.php:120

$wgLinkTitlesMinimumTitleLength
$wgLinkTitlesMinimumTitleLength
The minimum number of characters in a title that is required for it to be automatically linked to...
Definition: LinkTitles.php:56

LinkTitles\onGetDoubleUnderscoreIDs
static onGetDoubleUnderscoreIDs(array &$doubleUnderscoreIDs)
Adds the two magic words defined by this extension to the list of 'double-underscore' terms that are ...
Definition: LinkTitles.body.php:286

$wgLinkTitlesParseOnRender
$wgLinkTitlesParseOnRender
Important configuration variable that determines when the extension will process a page...
Definition: LinkTitles.php:84

$wgLinkTitlesSmartMode
$wgLinkTitlesSmartMode
Important setting that controls the extension's smart mode of operation.
Definition: LinkTitles.php:153

LinkTitles\onInternalParseBeforeLinks
static onInternalParseBeforeLinks(Parser &$parser, &$text)
Event handler that is hooked to the InternalParseBeforeLinks event.
Definition: LinkTitles.body.php:79

$wgLinkTitlesParseOnEdit
$wgLinkTitlesParseOnEdit
Important configuration variable that determines when the extension will process a page...
Definition: LinkTitles.php:71

LinkTitles\setup
static setup()
Setup function, hooks the extension's functions to MediaWiki events.
Definition: LinkTitles.body.php:48

LinkTitles\processPage
static processPage($title, RequestContext $context)
Automatically processes a single page, given a $title Title object.
Definition: LinkTitles.body.php:264

$wgLinkTitlesCheckRedirect
$wgLinkTitlesCheckRedirect
Determines whether or not to check if a page redirects to the current page.
Definition: LinkTitles.php:167