summaryrefslogtreecommitdiff
path: root/search4c_oldhb.py
diff options
context:
space:
mode:
authorgrothedev <grothedev@gmail.com>2025-10-04 00:07:38 -0400
committergrothedev <grothedev@gmail.com>2025-10-04 00:07:38 -0400
commita4f793ff77c4f5891f86d888eafccdc0e6a579e6 (patch)
tree63df9293189fd0105bdd2e8eca774931f0fafa06 /search4c_oldhb.py
Diffstat (limited to 'search4c_oldhb.py')
-rwxr-xr-xsearch4c_oldhb.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/search4c_oldhb.py b/search4c_oldhb.py
new file mode 100755
index 0000000..bed39b4
--- /dev/null
+++ b/search4c_oldhb.py
@@ -0,0 +1,65 @@
+#!/usr/bin/python3
+
+import json
+import requests
+import sqlite3
+import sys
+from wwwimgpull import *
+
+
+def processCatalog(catalog, b):
+
+ for i in range(0, len(catalog)): #each page of the board
+ for j in range(0, len(catalog[i]['threads'])): #each OP on the page
+ if not 'com' in catalog[i]['threads'][j]:
+ continue
+ url = "https://boards.4channel.org/"+b+"/thread/"+str(catalog[i]['threads'][j]['no'])
+ if wod == "*" or wod == "" or wod.lower() in catalog[i]['threads'][j]['com'].lower():
+ results_url.append(url)
+ results_content.append(catalog[i]['threads'][j]['com'])
+ for imgurl in pull4chImgs(url):
+ results_img.append(imgurl)
+ if not 'last_replies' in catalog[i]['threads'][j]:
+ continue
+ for k in range(0, len(catalog[i]['threads'][j]['last_replies'])): #each comment on the OP
+ r = catalog[i]['threads'][j]['last_replies'][k]
+ if not 'com' in r:
+ continue
+ if wod.lower() in r['com'].lower():
+ results_url.append(url+"#p"+str(catalog[i]['threads'][j]['last_replies'][k]['no']))
+ results_content.append(catalog[i]['threads'][j]['last_replies'][k]['com'])
+ #imgs were already retrieved from OP grab
+
+bods = ['a', 'c', 'w', 'm', 'cgl', 'cm', 'f', 'n', 'jp', 'vp', 'v', 'vg', 'vr', 'co', 'g', 'tv', 'k', 'o', 'an', 'tg', 'sp', 'asp', 'sci', 'int', 'out', 'toy', 'biz', 'i', 'po', 'p', 'ck', 'ic', 'wg', 'mu', 'fa', '3', 'gd', 'diy', 'wsg', 's', 'trv', 'fit', 'x', 'lit', 'adv', 'lgbt', 'mlp', 'b', 'r', 'r9k', 'pol', 'soc', 's4s']
+abods = ['hc', 'hm', 'h', 'e', 'u', 'd', 'y', 't', 'hr', 'gif']
+
+
+#####################################################################
+
+
+if len(sys.argv) < 2 or sys.argv[1][0] == '-':
+ print('This program will give you the URLs of all 4chan posts that contain the given search word, either on the entire site or on a select board. You can also use wildcard as search word for all posts of board/site')
+ print('you must provide a search word, or \"*\" for any word.')
+ print('Usage: ./query.py <searchword> [board]')
+ sys.exit(0)
+
+wod = sys.argv[1] #the search keyword
+bod = ''
+if len(sys.argv) > 2:
+ bod = sys.argv[2] #the board if interest, if given
+
+results_url = [] #URLs of threads containing the keyword
+results_content = [] #text of all posts and comments containing the keyword
+results_img = [] #URLs of all images containing the keyword
+
+if bod == '':
+ for b in bods:
+ res = requests.get("https://a.4cdn.org/"+b+"/catalog.json")
+ processCatalog(json.loads(res.text), b)
+else:
+ res = requests.get("https://a.4cdn.org/"+bod+"/catalog.json")
+ processCatalog(json.loads(res.text), bod)
+
+
+for url in results_url:
+ print(url)