Sections
Timeline
Sub-Sections
Last Change
Annotate
Revision Log
Download
Plain Text
Original Format
Metanav
Preferences
About Trac
Links
Slowchop Studios
Gerald Kaszuba
Advertisement

root/pygooglecalc/pygooglecalc.py

Revision 27, 1.4 kB (checked in by gak, 11 months ago)

small fixes

  • Property svn:keywords set to Id Revision
Line 
1import os
2import urllib
3
4from BeautifulSoup import BeautifulSoup as bs
5
6def download_page(definition):
7    os.system('wget -q --user-agent= -Oyo http://www.google.com.au/search?q=%s' \
8        % urllib.quote(definition))
9    return open('yo').read()
10
11def get_definition(definition):
12    data = download_page(definition)
13    soup = bs(data)
14    answer = soup.find('h2', {'class': 'r'})
15    if not answer:
16        return None
17    try:
18        answer = answer.font.b
19    except AttributeError:
20        return None
21    answer = strip_ml_tags(str(answer))
22    return answer
23
24def strip_ml_tags(in_text):
25        """Description: Removes all HTML/XML-like tags from the input text.
26        Inputs: s --> string of text
27        Outputs: text string without the tags
28       
29        # doctest unit testing framework
30
31        >>> test_text = "Keep this Text <remove><me /> KEEP </remove> 123"
32        >>> strip_ml_tags(test_text)
33        'Keep this Text  KEEP  123'
34        """
35        # convert in_text to a mutable object (e.g. list)
36        s_list = list(in_text)
37        i,j = 0,0
38       
39        while i < len(s_list):
40                # iterate until a left-angle bracket is found
41                if s_list[i] == '<':
42                        while s_list[i] != '>':
43                                # pop everything from the the left-angle bracket until the
44                # right-angle bracket
45                                s_list.pop(i)
46                               
47                        # pops the right-angle bracket, too
48                        s_list.pop(i)
49                else:
50                        i=i+1
51                       
52        # convert the list back into text
53        join_char=''
54        return join_char.join(s_list)
55
Note: See TracBrowser for help on using the browser.