In [2]:
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('data.pr4e.org', 80))
cmd = 'GET http://data.pr4e.org/intro-short.txt HTTP/1.0\r\n\r\n'.encode()
mysock.send(cmd)
while True:
data = mysock.recv(512)
if len(data) < 1:
break
print(data.decode(),end='')
mysock.close()
HTTP/1.1 200 OK Date: Wed, 06 Jul 2022 12:47:25 GMT Server: Apache/2.4.18 (Ubuntu) Last-Modified: Sat, 13 May 2017 11:22:22 GMT ETag: "1d3-54f6609240717" Accept-Ranges: bytes Content-Length: 467 Cache-Control: max-age=0, no-cache, no-store, must-revalidate Pragma: no-cache Expires: Wed, 11 Jan 1984 05:00:00 GMT Connection: close Content-Type: text/plain Why should you learn to write programs? Writing programs (or programming) is a very creative and rewarding activity. You can write programs for many reasons, ranging from making your living to solving a difficult data analysis problem to having fun to helping someone else solve a problem. This book assumes that everyone needs to know how to program, and that once you know how to program you will figure out what you want to do with your newfound skills.
In [18]:
# To run this, download the BeautifulSoup zip file
# http://www.py4e.com/code3/bs4.zip
# and unzip it in the same directory as this file
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = input('Enter - ')
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')
# Retrieve all of the anchor tags
tags = soup('a')
for tag in tags:
print(tag.get('href', None))
https://www.python.org/ download.html https://docs.python.org/3.12/ https://docs.python.org/3.11/ https://docs.python.org/3.10/ https://docs.python.org/3.9/ https://docs.python.org/3.8/ https://docs.python.org/3.7/ https://docs.python.org/3.6/ https://docs.python.org/3.5/ https://docs.python.org/2.7/ https://www.python.org/doc/versions/ https://www.python.org/dev/peps/ https://wiki.python.org/moin/BeginnersGuide https://wiki.python.org/moin/PythonBooks https://www.python.org/doc/av/ https://devguide.python.org/ genindex.html py-modindex.html https://www.python.org/ # whatsnew/3.10.html whatsnew/index.html tutorial/index.html library/index.html reference/index.html using/index.html howto/index.html installing/index.html distributing/index.html extending/index.html c-api/index.html faq/index.html py-modindex.html genindex.html glossary.html search.html contents.html bugs.html https://devguide.python.org/docquality/#helping-with-documentation about.html license.html copyright.html download.html https://docs.python.org/3.12/ https://docs.python.org/3.11/ https://docs.python.org/3.10/ https://docs.python.org/3.9/ https://docs.python.org/3.8/ https://docs.python.org/3.7/ https://docs.python.org/3.6/ https://docs.python.org/3.5/ https://docs.python.org/2.7/ https://www.python.org/doc/versions/ https://www.python.org/dev/peps/ https://wiki.python.org/moin/BeginnersGuide https://wiki.python.org/moin/PythonBooks https://www.python.org/doc/av/ https://devguide.python.org/ genindex.html py-modindex.html https://www.python.org/ # copyright.html /license.html https://www.python.org/psf/donations/ /bugs.html https://www.sphinx-doc.org/
In [22]:
text = ("UNIVERSITY OF ST. THOMAS LAW JOURNAL")
print (text.title())
University Of St. Thomas Law Journal
In [2]:
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('data.pr4e.org', 80))
cmd = 'GET http://data.pr4e.org/romeo.txt HTTP/1.0\r\n\r\n'.encode()
mysock.send(cmd)
while True :
data = mysock.recv(512)
if (len(data) < 1) :
break
print(data.decode())
mysock.close()
HTTP/1.1 200 OK Date: Fri, 08 Jul 2022 03:15:02 GMT Server: Apache/2.4.18 (Ubuntu) Last-Modified: Sat, 13 May 2017 11:22:22 GMT ETag: "a7-54f6609245537" Accept-Ranges: bytes Content-Length: 167 Cache-Control: max-age=0, no-cache, no-store, must-revalidate Pragma: no-cache Expires: Wed, 11 Jan 1984 05:00:00 GMT Connection: close Content-Type: text/plain But soft what light through yonder window breaks It is the east and Juliet is the sun Arise fair sun and kill the envious moon Who is already s ick and pale with grief
In [3]:
import urllib.request, urllib.parse, urllib.error
fhand = urllib.request.urlopen('http://data.pr4e.org/romeo.txt')
for line in fhand :
print(line.decode().strip())
But soft what light through yonder window breaks It is the east and Juliet is the sun Arise fair sun and kill the envious moon Who is already sick and pale with grief
In [4]:
import urllib.request, urllib.parse, urllib.error
fhand = urllib.request.urlopen('http://data.pr4e.org/romeo.txt')
counts = dict()
for line in fhand :
words = line.decode().split()
for word in words :
counts[word] = counts.get(word, 0) + 1
print(counts)
{'But': 1, 'soft': 1, 'what': 1, 'light': 1, 'through': 1, 'yonder': 1, 'window': 1, 'breaks': 1, 'It': 1, 'is': 3, 'the': 3, 'east': 1, 'and': 3, 'Juliet': 1, 'sun': 2, 'Arise': 1, 'fair': 1, 'kill': 1, 'envious': 1, 'moon': 1, 'Who': 1, 'already': 1, 'sick': 1, 'pale': 1, 'with': 1, 'grief': 1}
In [5]:
import urllib.request, urllib.parse, urllib.error
fhand = urllib.request.urlopen('http://www.dr-chuck.com/page1.htm')
for line in fhand :
print(line.decode().strip())
<h1>The First Page</h1> <p> If you like, you can switch to the <a href="http://www.dr-chuck.com/page2.htm"> Second Page</a>. </p>
In [ ]: