diff options
| author | grothedev <grothedev@gmail.com> | 2022-04-02 15:23:02 -0500 |
|---|---|---|
| committer | grothedev <grothedev@gmail.com> | 2022-04-02 15:23:02 -0500 |
| commit | 6861091f1ff212ebd2b44761c63015b327cb0761 (patch) | |
| tree | cb45ac1c285806dce92428b49a7b500d4052e706 | |
| parent | 7517de1aacdcba774be800f5d8d95934da4759aa (diff) | |
added video dl functionality
| -rw-r--r-- | 4chan_search/wwwimgpull.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/4chan_search/wwwimgpull.py b/4chan_search/wwwimgpull.py index f8eb52c..18325d3 100644 --- a/4chan_search/wwwimgpull.py +++ b/4chan_search/wwwimgpull.py @@ -1,3 +1,5 @@ +#!/bin/python2 + import requests from bs4 import BeautifulSoup import re @@ -22,6 +24,15 @@ def pull4chImgs(url): result.append(url) return result +def pullVids(url): + result = [] + resp = requests.get(url) + html = BeautifulSoup(resp.text, 'html.parser') + for a in html.find_all('a'): + if '.webm' in a.get('href'): + result.append(a.get('href')) + return result + def pullImgs(url): result = [] resp = requests.get(url) @@ -64,3 +75,4 @@ def pullPDFs(url, depth=0, alreadycrawled=[]): #for url in pull4chImgs(sys.argv[1]): # print(url) + |
