30
loading...
This website collects cookies to deliver better user experience
import socket
mysock = sockek.socket(socket.AF_INET, sock.SOCK_STREAM)
cmd = "GET http://data.pr4e.org /HTTP1.0\r\n\r\n"
mysock.connect(("data.pr4e.org", 80))
mysock.sendall(cmd)
# loop through receive data until it returns a 0
# which indicate data is no longer sent
while True:
data = mysock(512)
if data < 1:
break
print(data.decode(), end='')
mysock.close()
import urllib.request, urllib.parse, urllib.error
data = urllib.request("http://data.pr4e.org/cover3.jpg").read()
fhand = open("image.jpg", "wb")
fhand.write(data)
fhand.close()
from markdownify import markdownify as md
import urllib.request, urllib.parse, urllib.error
import ssl
from bs4 import BeautifulSoup
url = input("Enter URL: ")
# ignore SSL cerificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
try:
html = urllib.request.urlopen(url).read()
except:
print("Error opening URL")
exit()
soup = BeautifulSoup(html, "html.parser")
# remove all those lengthy class, id, name and iin-line styles
for tag in soup():
for attribute in ["class", "id", "name", "style"]:
del tag[attribute]
# remove the tags in the list below
for tag in soup(["style", "script", "sidebar", "aside"]):
tag.decompose()
print(soup.prettify())
prettiified_html = soup.prettify()
markdownified_html = md(prettiified_html)
fhand = open("{}.md".format(soup.title.string), "w")
fhand.write(markdownified_html)
fhand.close()