summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--4chan_search/wwwimgpull.py29
-rw-r--r--update-hosts.service4
-rwxr-xr-xupdate_hosts7
3 files changed, 35 insertions, 5 deletions
diff --git a/4chan_search/wwwimgpull.py b/4chan_search/wwwimgpull.py
index 6d7ab73..f8eb52c 100644
--- a/4chan_search/wwwimgpull.py
+++ b/4chan_search/wwwimgpull.py
@@ -2,6 +2,7 @@ import requests
from bs4 import BeautifulSoup
import re
import sys
+import time
###
# get images from websites
@@ -30,6 +31,34 @@ def pullImgs(url):
result.append(srcURL)
return result
+#def pullPDFs(url):
+# return pullPDFs(url, 0)
+
+def pullPDFs(url, depth=0, alreadycrawled=[]):
+ if depth > 5 or url == '' or url is None or url in alreadycrawled:
+ return []
+ baseurl=url[0:url.find('/', 8)+1]
+ result = []
+ print(url)
+ resp = requests.get(url)
+ html = BeautifulSoup(resp.text, 'html.parser')
+ alreadycrawled.append(url)
+ for a in html.find_all('a'):
+ url = a.get('href')
+ if url is None or url == '' or url == '/':
+ continue
+ if baseurl not in url and 'http' in url[0:4]: #this means that the url is pointing to external site
+ continue
+ print('found ' + url)
+ if 'http' not in url:
+ url = baseurl+url
+ if url.find('.pdf')>0 and os.path.isfile(url):
+ result.append(url)
+ else:
+ time.sleep(5)
+ result = result + pullPDFs(url, depth+1, alreadycrawled)
+ return result
+
#if len(sys.argv) < 2:
# sys.exit(0)
diff --git a/update-hosts.service b/update-hosts.service
index 6280cf5..2aa6111 100644
--- a/update-hosts.service
+++ b/update-hosts.service
@@ -1,7 +1,7 @@
[Unit]
Description=updates /etc/hosts with my custom hosts
-Requires=network-online.target
-After=multi-user.target
+Requires=network-connected.target
+After=multi-user.target network-connected.target
[Service]
Type=simple
diff --git a/update_hosts b/update_hosts
index 745eb68..0b3cc5a 100755
--- a/update_hosts
+++ b/update_hosts
@@ -8,9 +8,10 @@ localcheck=`nmap -sn 192.168.1.0/24 | grep debian`
if [[ $localcheck ]]; then
hb_ip=`echo $localcheck | sed 's/.*(\(.*\))/\1/g'`
else
- hb_ip=`ping grothe.ddns.net -c 1 | sed 's/bytes from.*(\(.*\)).*/\1/g'`
+ hb_ip=`ping grothe.ddns.net -c 1 | grep PING | awk '{print $3}' | sed 's/(\|)//g'`
fi
-sed -i 's/'${hb_ip}'.*//g'
-echo '${hb_ip} hb' >> /etc/hosts
+#sed -i 's/'${hb_ip}'.*//g' /etc/hosts
+sed -i 's/.*hb//g' /etc/hosts
+echo "${hb_ip} hb" >> /etc/hosts