diff options
| -rw-r--r-- | 4chan_search/wwwimgpull.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/4chan_search/wwwimgpull.py b/4chan_search/wwwimgpull.py index f8eb52c..18325d3 100644 --- a/4chan_search/wwwimgpull.py +++ b/4chan_search/wwwimgpull.py @@ -1,3 +1,5 @@ +#!/bin/python2 + import requests from bs4 import BeautifulSoup import re @@ -22,6 +24,15 @@ def pull4chImgs(url): result.append(url) return result +def pullVids(url): + result = [] + resp = requests.get(url) + html = BeautifulSoup(resp.text, 'html.parser') + for a in html.find_all('a'): + if '.webm' in a.get('href'): + result.append(a.get('href')) + return result + def pullImgs(url): result = [] resp = requests.get(url) @@ -64,3 +75,4 @@ def pullPDFs(url, depth=0, alreadycrawled=[]): #for url in pull4chImgs(sys.argv[1]): # print(url) + |
