bootstrap version of site
[Bans.Verniy.xyz-Modern.git] / CronJobs / ripper.php
blob56fab019c7c7274c2c799d02c6571a2ddb5d99a3
1 <?php
2 //Retrieve data from URL
3 function curl_get_contents($url)
5 $ch = curl_init();
6 curl_setopt($ch, CURLOPT_URL, $url);
7 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:12.0) Gecko/20100101 Firefox/12.0');
8 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
9 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
10 $data = curl_exec($ch);
11 curl_close($ch);
13 //echo($data);
15 return $data;
17 //make into html doc
18 function file_get_html($url)
20 /*disable error*/
21 libxml_use_internal_errors(true);
22 $doc = new DOMDocument();
23 $doc->loadHTML(curl_get_contents($url));
24 return $doc;
28 ////////////////
30 //Site to be ripped
31 $dom = file_get_html("https://www.4chan.org/bans");
32 //Data to be ripped
33 $table = $dom->getElementById('log-entries');
34 //Table lookup
35 $i = 0;
36 $j = 0;
37 $rowContents = array();
38 $rowDivisions = array();
40 //get JSON
41 $json = substr(
42 str_replace("var postPreviews = ", "", $dom->getElementsByTagName('script')->item(1)->nodeValue)
43 , 0, -4);
44 $json = json_decode($json, true);
45 foreach($table->getElementsByTagName('td') as $key =>$td){
46 //on hover links get data
47 if($i % 6 == 3){
48 //data-pid is an attribute attatched to links on the 4chan ban page to direct them to the JSON representation of bans from the script tag
49 $jsonID = $td->firstChild->getAttribute('data-pid');
50 echo $jsonID . "<br/>";
51 $json[$jsonID]["action"] = $td->previousSibling->previousSibling->previousSibling->previousSibling->nodeValue;
52 $json[$jsonID]["length"] = $td->previousSibling->previousSibling->nodeValue;
53 $json[$jsonID]["reason"] = $td->nextSibling->nextSibling->nodeValue;
54 $rowContents[$j++] = $json[$jsonID];
56 $i++;
59 echo("<br>");echo("<br>---------============-------------");echo("<br>");echo("<br>");
62 //ledger data retrieve
63 $lines = array();
64 $leger_data = fopen("/home4/ecorvid/bans.verniy.xyz/4Chan_Bans_Log-Ledger.txt", "r");
65 $all_ledger_data = array();
66 $ledger_line = 0;
67 while(!feof($leger_data)){
68 $all_ledger_data[$ledger_line++] = fgets($leger_data);
70 fclose($leger_data);
71 //ledger holds total entries in database followed by the page one filename
72 $total_entries = trim($all_ledger_data[0]);
73 $page_one_file = trim($all_ledger_data[1]);
75 //get current file data
76 $logStore = fopen("/home4/ecorvid/bans.verniy.xyz/Logs/4Chan_Bans_Log-Reverse_Chrono-$page_one_file.json", "r") or die ("could not read 4Chan_Bans_Log-Reverse_Chrono-$page_one_file");
77 $log_lines = 0;
78 while (!feof($logStore)) {
79 $line = fgets($logStore);
80 if($line == "") continue;
81 if($log_lines == 0) {
82 $line = substr($line,1);
83 $line[strpos($line, ",", strlen($line) - 3)] = "";
85 else if($log_lines == 999)$line[strpos($line, "]", strlen($line) - 3)] = "";
86 else $line[strpos($line, ",", strlen($line) - 3)] = "";
87 $line = substr($line,0, strlen($line) - 2);
89 $lines[$log_lines++] = $line;
91 $line_count = count($lines);
92 fclose($logStore);
94 //Process JSON and store in current file
95 if($line_count <= 1000)
96 $logFile = fopen("/home4/ecorvid/bans.verniy.xyz/Logs/4Chan_Bans_Log-Reverse_Chrono-$page_one_file.json", "a") or die ("could not read 4Chan_Bans_Log-Reverse_Chrono-$page_one_file");
97 else{
98 //move into another file due to excess data
99 $page_one_file++;
100 $logFile = fopen("/home4/ecorvid/bans.verniy.xyz/Logs/4Chan_Bans_Log-Reverse_Chrono-$page_one_file.json", "w") or die ("could not read 4Chan_Bans_Log-Reverse_Chrono-$page_one_file");
102 $new_entries = 0;
104 require_once("../Class/database-construction.php");
105 $database = new DatabaseConstruction("../");
107 for($json_lines = count($rowContents) - 1 ; $json_lines >= 0 ; $json_lines--){
108 //table row data
109 $logLine = json_encode($rowContents[$json_lines]);
111 $pass = true;
112 foreach($lines as $key => $line){
113 if(strcmp($line, $logLine) == 0){
114 echo "||++++++++++++||<br>$line<br>FF++++++++++++FF<br>$logLine<br>||++++++++++++||
115 <br>---------------------";
116 $pass = false;
117 break;
120 if($pass){
121 echo("<br><br>XXXXXXXXXXXX<br>" . $logLine . "<br>XXXXXXXXXXXX<br><br>");
122 if(($line_count + $new_entries) >= 1000){
123 fclose($logFile);
124 $page_one_file++;
125 $logFile = fopen("/home4/ecorvid/bans.verniy.xyz/Logs/4Chan_Bans_Log-Reverse_Chrono-$page_one_file.json", "a") or die ("could not read 4Chan_Bans_Log-Reverse_Chrono-$page_one_file");
126 echo $line_count . " " . $new_entries;
127 $line_count = 0;
128 $logLine = "[" . $logLine . ",";
130 else if(($line_count + $new_entries) == 999){
131 $logLine = $logLine . "]";
133 else {
134 $logLine = $logLine . ",";
136 $new_entries++;
138 $database->addToTable("Bans", ["board"=> $rowContents[$json_lines]["board"], "now"=> $rowContents[$json_lines]["now"],
139 "name"=> $rowContents[$json_lines]["name"], "trip"=> $rowContents[$json_lines]["trip"],
140 "com"=> $rowContents[$json_lines]["com"], "time"=> $rowContents[$json_lines]["time"],
141 "md5"=> $rowContents[$json_lines]["md5"], "filename"=> $rowContents[$json_lines]["filename"],
142 "action"=> $rowContents[$json_lines]["action"], "length"=> $rowContents[$json_lines]["length"],
143 "reason"=> $rowContents[$json_lines]["reason"]]);
144 fwrite($logFile, $logLine . "\n");
146 echo("<br><br>");
148 //update ledger
149 $leger_data = fopen("/home4/ecorvid/bans.verniy.xyz/4Chan_Bans_Log-Ledger.txt", "w");
150 //update Total
151 fwrite($leger_data, $all_ledger_data[0] + $new_entries . "\n");
152 fwrite($leger_data, $page_one_file . "\n");
153 fclose($logFile);