'''data2.py

   Jed Yang, 2016-11-12

   Fetch a file over the Internet.
'''

import urllib.request

url = 'http://cs.carleton.edu/faculty/jyang/'
page = urllib.request.urlopen(url)
while True:
   line = page.readline().decode('utf-8')
   if not line:
      break                # end-of-file reached
   if line.lower().find('office hours') >= 0:
      print(line, end='')  # line already has its own '\n' at the end

# Task 1.  Get rid of the 'office hours' test logic so as to print every line.
# Try to make sure it is working properly.

# Task 2.  Fetch this file instead:
#     http://cs.carleton.edu/faculty/jyang/cs111/src/gene-data.txt

# I have a biologist friend working at National Institutes of Health (NIH), who
# sent me this file and asked me to help extract certain data from the file.
# (True story.)

# I decided to show you the actual data, so it is a bit long.  When you run
#     python3 data2.py
# you will see thousands of lines fly by you.  5947 lines, to be exact.  You
# can save this output by doing this:
#     python3 data2.py > gene-data.txt
# This syntax has nothing to do with Python, actually.  It is a command-line
# syntax that gives us a handy way to save any program output as a text file.

# If you want to make sure the file is sound, you can run
#     sha1sum gene-data.txt
# to see the contents of the file reduced to an alphanumeric string:
#     352b98bf5383abd589f41bafe4067de03bbad108  gene-data.txt
# Think of this as a 'signature' of the file.  If you even change one letter,
# the sha1sum will vary wildly.  So this method is often used to check the
# integrity of a downloaded file.

# If you are having trouble getting the file via this complicated Python
# method (which I wanted to teach you), you can also download it via your web
# browser.

# Make sure you have a copy of gene-data.txt in the same directory and move on
# to data3.py.