A small Slim application for returning a list of pages in a MediaWiki category, ordered by Dale-Chall readability scores.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

131 lines
3.4 KiB

  1. <?php
  2. use DaveChild\TextStatistics as TS;
  3. use Slim\Http\Request;
  4. use Slim\Http\Response;
  5. /**
  6. * Home page with basic template and category input form.
  7. */
  8. $app->get( '/', function (Request $request, Response $response, array $args) {
  9. // Render index view
  10. return $this->renderer->render( $response, 'index.phtml', $args );
  11. } );
  12. /**
  13. * An endpoint for getting category data by handing a request off to a
  14. * MediaWiki server.
  15. */
  16. $app->get( '/category', function (Request $request, Response $response, array $args) {
  17. $category = $request->getQueryParam('cat');
  18. $type = $request->getQueryParam('type');
  19. $queryParams = [
  20. 'action' => 'query',
  21. 'format' => 'json',
  22. 'generator' => 'categorymembers',
  23. 'gcmtitle' => "Category:$category",
  24. 'gcmlimit' => '60',
  25. 'prop' => 'extracts',
  26. 'exintro' => '1',
  27. 'explaintext' => '1',
  28. ];
  29. $textStatistics = new TS\TextStatistics;
  30. $pageList = [];
  31. $error = null;
  32. $endpoint = $this->get( 'settings' )['mwEndpoint'];
  33. while ( true ) {
  34. // Since we can only retrieve 20 extracts at a time, we'll need to continue
  35. // through the full list:
  36. if ( isset( $excontinue ) ) {
  37. $queryParams['excontinue'] = $excontinue;
  38. }
  39. list( $categoryData, $apiStatus ) = apiRequest( $endpoint, $queryParams );
  40. if ( $apiStatus !== 200 ) {
  41. $error = 'MediaWiki API request failed.';
  42. break;
  43. }
  44. if ( !isset( $categoryData['query']['pages'] ) ) {
  45. $error = 'No pages found for category.';
  46. break;
  47. }
  48. // Assign readability scores to each extract and build a list:
  49. foreach ( $categoryData['query']['pages'] as $page ) {
  50. // Skip if no intro or no extract for page in this part of the resultset:
  51. if ( !isset( $page['extract'] ) || !strlen( $page['extract'] ) ) {
  52. continue;
  53. }
  54. $score = $textStatistics->daleChallReadabilityScore( $page['extract'] );
  55. $pageList[] = [ $page['title'], $page['extract'], $score ];
  56. }
  57. $excontinue = null;
  58. if ( isset( $categoryData['continue']['excontinue']) ) {
  59. $excontinue = $categoryData['continue']['excontinue'];
  60. } else {
  61. // No more extract results.
  62. break;
  63. }
  64. }
  65. // Initial sort by readability score:
  66. usort( $pageList, function ($pageA, $pageB) {
  67. return $pageA[2] <=> $pageB[2];
  68. } );
  69. // Either send back raw JSON or return the home page with data
  70. // the template can use to build a table:
  71. if ( $type === 'json' ) {
  72. return $response->withJson( [
  73. 'pageList' => $pageList,
  74. 'error' => $error
  75. ] );
  76. } else {
  77. $templateVars = [
  78. 'pageList' => $pageList,
  79. 'error' => $error,
  80. 'cat' => $category
  81. ];
  82. return $this->renderer->render(
  83. $response,
  84. 'index.phtml',
  85. $templateVars
  86. );
  87. }
  88. } );
  89. /**
  90. * Take a MediaWiki API endpoint and some query parameters, use cURL to
  91. * retrieve results.
  92. *
  93. * @param string $endpoint URL of API endpoint
  94. * @param array $queryParams collection of GET parameters
  95. * @return array containing json_decode()ed API response and HTTP status code
  96. */
  97. function apiRequest ( $endpoint, $queryParams ) {
  98. $queryString = http_build_query( $queryParams );
  99. $ch = curl_init( $endpoint . $queryString );
  100. curl_setopt( $ch, \CURLOPT_RETURNTRANSFER, 1 );
  101. curl_setopt( $ch, CURLOPT_USERAGENT, "MediaWikiCategoryReadability/0.0.1 (mediawiki@chaff.p1k3.com)" );
  102. $apiResponse = curl_exec( $ch );
  103. $apiStatus = curl_getinfo( $ch, CURLINFO_HTTP_CODE );
  104. curl_close( $ch );
  105. $categoryData = json_decode( $apiResponse, true );
  106. return [ $categoryData, $apiStatus ];
  107. }