#!/usr/bin/python
# -*- coding: ISO-8859-15 -*-
import os
import urllib2
import sys
import logging
import logging.handlers
from multiprocessing import Pool
from multiprocessing import TimeoutError
from multiprocessing import Manager

processed_urls = Manager().dict()

# mise en place du logger
logger = logging.getLogger('WebLogger')
logger.setLevel(logging.INFO)

# mise en place du handler
handler = logging.StreamHandler()
logger.addHandler(handler)

def get_page(url):
    """Extrait le contenu et les liens"""
    return '', []

def process_url(query, url):
    """Traite une page et ses liens"""
    logger.info('Processing %s' % url)

    if len(processed_urls) >= 100:
        return
    if url in processed_urls:
        return
    try:
        content, links = get_page(url)
        processed_urls[url] = query in content
        for link in links:
            process_url(query, url)
    except TimeoutError:
        pass

def launch_work(query, urls):
    logger.info('Launching process')
    pool = Pool(4)
    try:
        results = [pool.apply_async(process_url, (query, url)) for url in urls]
        for res in results:
            res.get()
    finally:
        pool.close()
        pool.join()
    logger.info('Done.')

URLS = ['http://python.org',]

if __name__ == '__main__':
    launch_work(sys.argv[1], URLS)

    for url, found in processed_urls.items():
        if not found:
            continue
        print url


