Exporting all the files of a wiki can be done in a few different ways:
="*[[:"&A1&"]]"
use strict; use warnings; use LWP::Simple; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; my $url="http://libertarianwiki.org/User:Joe Schmoe/All_files_2"; my $agentName="User:Tisane (http://www.mediawiki.org/wiki/User:Tisane) grabbing some data using FileNameExtract.pl"; my $browser = LWP::UserAgent->new(); $browser->timeout(500); my $request = HTTP::Request->new(GET => $url); my $response = $browser->request($request); if ($response->is_error()) {printf "%s\n", $response->status_line;} my $contents = $response->content(); my $delimiter="\n"; my $string='title="File:'; my $endString='"'; my $position=0; my $endPosition=0; $position=index($contents,$string,$position)+length($string); $endPosition=index($contents,$endString,$position); my $firstFileName=substr($contents,$position,$endPosition-$position); print '$myFileName[0]="'.$firstFileName.'";'.$delimiter; $position=$endPosition; my $fileNumber=0; while ($position!=-1){ $fileNumber++; $position=index($contents,$string,$position)+length($string); if ($position!=-1){ $endPosition=index($contents,$endString,$position); my $fileName=substr($contents,$position,$endPosition-$position); if ($fileName ne $firstFileName){ print '$myFileName['.$fileNumber.']="'.$fileName.'";'.$delimiter; $position=$endPosition; } else { $position=-1; } } }
use strict; use warnings; use LWP::UserAgent; use HTTP::Request; # Files to export from the Wiki. my @exportFiles = ( "01-gold-bar.jpg", "100px-Massachusetts state flag.png", "100px-New York state flag.png", "128px-Padlock-red.svg.png", ...and so on... ); # Configuration variables my $string = 'images/'; my $endString = '"'; my $delimiter = "\n"; my $reject1 = 'LibertarianWiki.gif);'; my $reject2 = 'icons/fileicon-pdf.png'; # Initialize the browser my $browser = LWP::UserAgent->new(); $browser->timeout(500); for my $idx (@exportFiles){ my $exportFile = $exportFiles[$idx]; my $url = "http://libertarianwiki.org/File:$exportFile"; my $request = HTTP::Request->new(GET => $url); my $response = $browser->request($request); if (!$response->is_success) { printf STDERR "%s\n", $response->status_line; } my $contents = $response->content(); my $position = index($contents, $string, 0) + length($string); my $endPosition = index($contents, $endString, $position); my $filename = substr($contents, $position, $endPosition-$position); if ($position != -1 && $filename ne $reject1 && $filename ne $reject2){ print qq{\$exportFiles[$idx] = '$filename';$delimiter}; } }
This in turn will generate a list that you can load into yet another script, e.g.:
use strict; use warnings; use LWP::Simple; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; my @myFileName=(''); $myFileName[0]="7/78/01-gold-bar.jpg"; $myFileName[1]="5/53/100px-New_York_state_flag.png"; $myFileName[2]="8/81/128px-Padlock-red.svg.png"; ... ... ... $myFileName[349]="a/a6/WilliamGodwin.jpg"; $myFileName[350]="b/b1/Wirtland_Coat_of_Arms.png"; $myFileName[351]="f/f5/Wirtland_crane.png"; my $agentName="User:Tisane (http://www.mediawiki.org/wiki/User:Tisane) grabbing some data using DownloadImages.pl"; my $browser = LWP::UserAgent->new(); $browser->timeout(500); my $string=''; my $endString='"'; my $position=0; my $endPosition=0; my $prefix=''; my $reject1='skip me'; my $newArrayIndex=0; my $delimiter="\n"; my $FILE='myhandle'; for (my $count=0; $count<=351; $count++){ my $url="http://libertarianwiki.org/wiki/images/".$myFileName[$count]; #my $request = HTTP::Request->new(GET => $url); #my $response = $browser->request($request); #if ($response->is_error()) {printf "%s\n", $response->status_line;} #my $contents = $response->content(); my $contents = get($url); my $newFileName=substr($myFileName[$count],5,length($myFileName[$count])-5); print $url.$delimiter; print $newFileName.$delimiter; sysopen(FILE, $newFileName,0755); print FILE $contents; close FILE; }
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4