Wednesday, May 1, 2013

Simple Mapper Class for NDB on App Engine

This class is based on the db mapper found in remote_api article. But using ndb, the purpose of this is if you want to iterate through a lot of entities but not enough time to do it on request time. So this library helps you create a map of your entities of given kind.

You should use this in cases like, deleting users that requested for deletion or updating counters for specific filters.

Here is the NDB version of the Mapper. I have added a bit of improvement that I have used for in the past. Such as memcache for keeping track of running tasks. But this is limited to a mapper at a time, if you have different filters you can add that as a different key. This is also in conjunction with naming my task differently and catch a duplicate task entry to avoid it.


import datetime
import logging
from google.appengine.api import memcache
from google.appengine.ext import deferred, ndb
from google.appengine.runtime import DeadlineExceededError


class Mapper(object):
    prefix_key = 'mapper_'

    def __init__(self, use_cache=False):
        ndb.get_context().set_cache_policy(use_cache)
        if not use_cache:
            ndb.get_context().clear_cache()

        self.KIND = None
        self.to_put = []
        self.to_delete = []
        self.terminate = False
        # Data you wanna carry on in case of error
        self.DATA = None
        # Temporary Data that won't carry on in case of error
        self.TMP_DATA = None
        self.FILTERS = []
        self.ORDERS = []
        # implement init for different initializations
        self.init()

    def delete(self, entity):
        self.to_delete.append(entity.key)

    def update(self, entity):
        self.to_put.append(entity)

    def map(self, entity):
        """Map a single entity"""

    def init(self):
        # initialize variables
        pass

    def deadline_error(self):
        # on deadline error execute
        pass

    def finish(self):
        """Called when the mapper has finished, to allow for any final work to be done."""
        pass

    def get_query(self):
        """Returns a query over the specified kind, with any appropriate filters applied."""
        q = self.KIND.query()
        for filter in self.FILTERS:
            q = q.filter(filter)
        for order in self.ORDERS:
            q = q.order(order)

        return q

    def run(self, batch_size=100, initial_data=None):
        if initial_data is None:
            initial_data = self.DATA
        """Starts the mapper running."""
        if hasattr(self, '_pre_run_hook'):
            getattr(self, '_pre_run_hook')()

        self._continue(None, batch_size, initial_data)

    def _batch_write(self):
        """Writes updates and deletes entities in a batch."""
        if self.to_put:
            ndb.put_multi(self.to_put)
            del self.to_put[:]
        if self.to_delete:
            ndb.delete_multi(self.to_delete)
            del self.to_delete[:]

    def _continue(self, cursor, batch_size, data):
        self.DATA = data
        q = self.get_query()
        if q is None:
            self.finish()
            return
        # If we're resuming, pick up where we left off last time.
        iter = q.iter(produce_cursors=True, start_cursor=cursor)
        # Keep updating records until we run out of time.
        cache_id = self.prefix_key + self.__class__.__name__
        try:
            # create a 10 minute cache
            start_time = datetime.datetime.now()
            memcache.set(cache_id, start_time, 60 * 10)
            # Steps over the results, returning each entity and its index.
            i = 0
            while iter.has_next():
                entity = iter.next()
                self.map(entity)
                # Do updates and deletes in batches.
                if (i + 1) % batch_size == 0:
                    # Record the last entity we processed.
                    self._batch_write()
                    # check if time has expired
                    if (datetime.datetime.now() - start_time).seconds > 60 * 10:
                        start_time = datetime.datetime.now()
                        memcache.set(cache_id, start_time, 60 * 10)
                i += 1
                if self.terminate:
                    break

            self._batch_write()
        except DeadlineExceededError:
            # Write any unfinished updates to the datastore.
            self._batch_write()
            self.deadline_error()
            # Queue a new task to pick up where we left off.
            deferred.defer(self._continue, iter.cursor_after(), batch_size, self.DATA)
            logging.error(self.__class__.__name__ + ' DeadlineExceedError')
            return
        self.finish()
        memcache.delete(cache_id)


Then here is a sample usage:
from google.appengine.ext.ndb import blobstore

class DeleteUser(Mapper):

    def init(self):
        self.KIND = User
        # Im using a generic property cause it was an 
        # expando model where I added this on their deletion request
        # then gave the user enough time to undelete with a future date.
        self.FILTERS = [ndb.GenericProperty('deleted') <= datetime.datetime.now()]

    def map(self, user):
        # Sample usage why you want to run this in a mapper
        blobstore.delete_multi(user.photos)
        # mini batches here
        for_delete = []
        for comment_key in Comment.query(Comment.user == user.key).iter(keys_only=True):
             for_delete.append(comment_key)
             if len(for_delete) >= 100:
                 ndb.delete_multi(for_delete)
                 for_delete = []
        ndb.delete_multi(for_delete)
        # and more, the more you do here probably the best to make the batches small
        # to avoid having to duplicate runs on a failure
        self.delete(user)



You can use this on both frontend and backend instances, the 10 minute limit should be handled automatically and continue from the last successful batch. Then to run this using a deferred library or if you will run it in cron just create a handler that simply runs it:
# on a handler
deleteUser = DeleteUser()
deleteUser.run(1)  # I made batch 1 since we are doing a lot of things

# with deferred library (For someone not familiar, It's a convenient library for taskqueue)
from google.appengine.ext import deferred
# anything that starts with _ is for taskqueue api, before that is for your method
deferred.defer(deleteUser.run, 1, _target='backend_name_if_you_want', _name='a_name_to_avoid_dups')
Thursday, April 25, 2013

Summary of my Android Apps

After checking the archive list of my blog I didn't see me sharing my android apps on here. So here it is, I currently have 4 active apps mostly created for myself that I published on google play.

AppLauncher+
This app automatically organize your apps base on google play categories. Reason I built is cause I flash my firmware a lot at one time that reorganizing folders of my apps just takes too much time. I couldn't find one that is simple enough that would just work and I won't ever touch it again. It has now evolve to have features like:

  • Manual Categorization (had to do it cause of too much demand)
  • Floating launcher (for paid, you can open a folder/assign commands on what it does)
  • Create Shortcut & Folder view on those shortcut (also paid only)
  • Free version basically just gets an organized list with ads! :(

This is a live wallpaper, you can select a static wallpaper then then it will have your borders as status bar. I did this because I thought it was cool. It really wasn't from the current population. Ohh well I still use it. It can now show the status bar anywhere and features like random wallpaper and wallpaper changing depending on your battery level.

Another app that I use for myself, couldn't find one that exists. It basically is an image/file importer from a link. So if you are using an image editor and you choose to open a photo, you can select this app and paste the url and it will download and use it on the editor.

Shows you a random app. That's it, I was bored. You can star for easy access later.

There are few more that I build with a friend at RamenTech.

JSONRPC Server & Client For Python on Google App Engine

Now that google cloud endpoints is around the corner it will and probably should be a standard way of creating web services for any types of clients for mobile, desktop or even your ajax requests. It's still experimental as the time of this writing and I will not really talk about how to use it since their documentation has some good example on it already.

I will be sharing on how and what I've used to create my own web services for android clients I have created and for ajax calls.

I have created my own jsonrpc client/server class for python. My own full implementation of jsonrpc standards. I have included this on my app-engine-starter code with some sample if you run it and click the JSONRPC Demo dropdown. Feel free to use it. It is still a nice simple library to use creating web services.

I will give a quick sample code here on how it's used:


import logging
from google.appengine.ext import webapp, ndb
import jsonrpc


class Calculator():

    def add(self, a, b):
        return a + b

    def subtract(self, a, b):
        return a - b


# Here is the RPC Handler for your calculator
class CalculatorHandler(webapp.RequestHandler):

    def post(self):
        # just pass the class you want to expose
        server = jsonrpc.Server(Calculator())
        # passing request & response handles all necessary headers
        server.handle(self.request, self.response)


# Here is the RPC Client for your calculator
# Demonstrating an async & synchronous way
# Although you wouldn't really wanna use it on same server
# this is just demo purposes. (Not true for ajax calls which is included on app-starter demo)
class CalculatorClientHandler(webapp.RequestHandler):

    def get(self):
        # this is an async rpc client so you don't need to wait for any calls to finish
        # it's also sampled in a blog post about searching google
        # it uses ndb context again so you can batch it with other ndb async calls
        # remember that if the server supports batching, you should make use of that
        # uses for async fetches are helpful on different domain rpc calls
        calc_async = jsonrpc.ClientAsync('http://localhost:8080/rpc/calculator')
        futures = [calc_async.add(i, 1) for i in range(5)]
        # now we solve another async call without waiting for the others
        calc = jsonrpc.Client('http://localhost:8080/rpc/calculator')
        answer = calc.add(1, 2)
        logging.info('We got answer before requests! %s' % answer)
        # now we wait for all to finish
        ndb.Future.wait_all(futures)
        # Then we respond the answer
        return self.response.write('%s %s'  % (answer, [future.get_result() for future in futures]))


app = webapp.WSGIApplication([('/rpc/calculator', CalculatorHandler),
                              ('/calculator', CalculatorClientHandler)],
                             debug=True)

# to make sure all unhandled async task are finished
app = ndb.toplevel(app)

This is specifically designed for google app engine because of the use of ndb context for asynchronous calls for the client. The server should work normally on any other environment. But it shouldn't be hard to change the client to work with a normal tasklet, it's just simple replace of the library that is used for urlfetch. This is helpful so that if you use a lot of async calls with ndb you are taking advantage of its auto batch feature which will try to group all possible requests as small network hop as possible.

Here is a direct link if you just want the jsonrpc.py

An update base on Rober King's suggestion, it would be more convenient to just create a base ApiHandler so that you can easily just extend it and not pass all session variables and anything you setup on a request scope. Here is a way to do it with current jsonrpc module.
class ApiHandler(webapp.RequestHandler):
    # usually this should really be extending your base handler
    def post(self):
        server = jsonrpc.Server(self)
        server.handle(self.request, self.response)

# Now you directly put all your methods in the handler
class CalculatorHandler(ApiHandler):

    def add(self, a, b):
        return a + b

    def subtract(self, a, b):
        return a - b
Tuesday, April 23, 2013

NDB Caching Queries Tips & Best Practice - Google App Engine

If you are creating a heavy read app engine app, that has a lot of listing/query entities it's a good idea to cache those queries so you don't get charged for reads. But you want it to also be up to date and not have to worry about invalidations.

Here is some of the things I've done for caching queries. This can't be applied to all but should work on most and can be implemented on same manner with more complex queries.

The idea is to have an updated field on the fields you are filtering from so you can use that as your cache key.

Here is a sample code that that shows how to display user post with cached queries.


from google.appengine.ext import ndb

class User(ndb.Model):
    created = ndb.DateTimeProperty(auto_now_add=True, indexed=False)
    updated = ndb.DateTimeProperty(auto_now=True, indexed=False)

    email = ndb.StringProperty()
    # It's always good to keep a total of everything if you are displaying it
    total_comments = ndb.IntegerProperty(default=0, indexed=False)


class Comment(ndb.Model):
    created = ndb.DateTimeProperty(auto_now_add=True, indexed=False)
    updated = ndb.DateTimeProperty(auto_now=True, indexed=False)

    user = ndb.KeyProperty(required=True)
    message = ndb.TextProperty()

    @classmethod
    @ndb.transactional(xg=True)
    def post_comment(cls, user, message):        
        user.total_comments += 1
        comment = Comment(user=user.key, message=message)
        ndb.put_multi([user, comment])

    @classmethod
    def get_by_user(cls, user, cursor=None):
        ctx = ndb.get_context()
        # every new comment you add a total and updated field so the cache invalidates instantly
        cache_id = 'get_by_user_%s_%s_%s' % (user.key.urlsafe(), user.updated, cursor)
        cache = ctx.memcache_get(cache_id).get_result()

        if cache:
            result, cursor, more = cache
            # This is your decision if you want to cache keys only
            # it's helpful in cases that you have a single page with that value
            # it means that you cache less and more efficiently
            result = filter(None, ndb.get_multi([r for r in result]))
        else:
            qry = cls.query(cls.user == user.key)

            result, cursor, more = qry.fetch_page(20, start_cursor=ndb.Cursor(urlsafe=cursor) if cursor else None)
            # cache keys only again your decision, you can cache the whole thing if it's not important
            # expiration is not needed if it's this simple
            ctx.memcache_set(cache_id, ([r.key for r in result], cursor, more))

        return result, cursor, more
Friday, December 14, 2012

How to Build Live Wallpaper with Canvas on Android

This will be a very basic canvas live wallpaper with android. Just to get people started.
Just read the comments on the code to understand how everything works.

AndroidManifest.xml

<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
          package="org.altlimit.samplelivewallpaper"
          android:versionCode="1"
          android:versionName="1.0">
    <uses-sdk android:minSdkVersion="15"/>
    <application android:label="@string/app_name" android:icon="@drawable/ic_launcher">
        <activity android:name="MyActivity"
                  android:label="@string/app_name">
            <intent-filter>
                <action android:name="android.intent.action.MAIN"/>
                <category android:name="android.intent.category.LAUNCHER"/>
            </intent-filter>
        </activity>
        <!--The service that will be drawing your wallpaper-->
        <service android:name=".SampleService"
                 android:label="@string/app_name"
                 android:permission="android.permission.BIND_WALLPAPER"
                 android:icon="@drawable/ic_launcher">

            <intent-filter>
                <action android:name="android.service.wallpaper.WallpaperService" />
            </intent-filter>
            <!--To tell your wallpaper which settings activity it will launch-->
            <meta-data android:name="android.service.wallpaper"
                       android:resource="@xml/meta" />

        </service>
        <!--The activity declaration settings for your wallpaper-->
        <activity android:label="Settings"
                  android:name=".SampleSettings"
                  android:exported="true"
                  android:icon="@drawable/ic_launcher">
        </activity>

    </application>

    <uses-feature
            android:name="android.software.live_wallpaper"
            android:required="true" >
    </uses-feature>

</manifest>

res/xml/settings.xml - this is a normal preference activity
<?xml version="1.0" encoding="utf-8"?>
<PreferenceScreen xmlns:android="http://schemas.android.com/apk/res/android"
                  android:title="Sample Live Wallpaper Settings"
                  android:key="samplelive_wallpaper_settings">

    <PreferenceCategory
            android:title="Settings">

        <CheckBoxPreference
                android:key="pref_sample_checkbox"
                android:defaultValue="false"
                android:title="Sample Checkbox"
                />
        <EditTextPreference
                android:key="pref_count"
                android:defaultValue="10"
                android:numeric="integer"
                android:title="Write Me This amount"
                />
        <EditTextPreference
                android:key="pref_text"
                android:defaultValue="Hello World"
                android:title="What to write"
                />
    </PreferenceCategory>

</PreferenceScreen>


SampleSettings.java - normal preference activity
package org.altlimit.samplelivewallpaper;

import android.content.SharedPreferences;
import android.os.Bundle;
import android.preference.PreferenceActivity;

public class SampleSettings extends PreferenceActivity implements SharedPreferences.OnSharedPreferenceChangeListener {

    @Override
    public void onSharedPreferenceChanged(SharedPreferences sharedPreferences, String s) {

    }

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);

        addPreferencesFromResource(R.xml.settings);
    }
}


MyActivity.java
package org.altlimit.samplelivewallpaper;

import android.app.Activity;
import android.app.WallpaperManager;
import android.content.Intent;
import android.os.Bundle;

public class MyActivity extends Activity {
    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.main);
        // Call the live wallpaper picker when started your app
        Intent intent = new Intent(WallpaperManager.ACTION_LIVE_WALLPAPER_CHOOSER);
        startActivity(intent);

    }

    @Override
    protected void onResume() {
        super.onResume();    //To change body of overridden methods use File | Settings | File Templates.
        finish();
    }
}


SampleService.java
package org.altlimit.samplelivewallpaper;

import android.content.SharedPreferences;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.os.Handler;
import android.preference.PreferenceManager;
import android.service.wallpaper.WallpaperService;
import android.text.Layout;
import android.text.StaticLayout;
import android.text.TextPaint;
import android.util.Log;
import android.view.SurfaceHolder;

public class SampleService extends WallpaperService {


    /**
     * A wallpaper service is actually just another android service.
     * You just need to implement the onCreateEngine to get started and return your engine
     */
    @Override
    public Engine onCreateEngine() {
        return new SampleEngine();
    }

    class SampleEngine extends Engine {
        /**
         * Create variables for your settings
         */
        private boolean checkedSettings = false;
        private String textToWrite = null;
        private int timesToWrite = 0;
        private boolean isVisible = false;

        private Paint paint;
        private StaticLayout staticLayout;
        private Handler redrawHandler = new Handler();
        private Runnable redrawRunnable = new Runnable() {
            @Override
            public void run() {
                draw();
            }
        };

        // Here is where all the drawing stuff happens
        private void draw() {
            SurfaceHolder holder = getSurfaceHolder();
            Canvas c = null;
            try {
                c = holder.lockCanvas();
                if (c != null) {
                    // Resets your canvas to black surface
                    c.drawColor(Color.WHITE);
                    int cW = c.getWidth();
                    int cH = c.getHeight();

                    if (textToWrite != null && timesToWrite > 0) {
                        paint = new Paint();

                        String multipleText = "";
                        for (int i = 0; i < timesToWrite; i ++) {
                            multipleText = multipleText.concat(" ").concat(textToWrite);
                        }

                        paint.setColor(Color.BLACK);
                        staticLayout = new StaticLayout(multipleText, new TextPaint(paint), cW - 10, Layout.Alignment.ALIGN_NORMAL, 1.0f, 0.0f, false);
                        // Move top left where to write the text
                        c.translate(10, cH / 3);
                        staticLayout.draw(c);

                        // if you want to write more stuff to canvas
                        c.restore();
                    }

                }
            } finally {
                if (c != null)
                    holder.unlockCanvasAndPost(c);
            }
            // If you have changing data then here is how you redraw
            if (isVisible) {
                // if you have a longer delay you probably want to clear other in queue to avoid multiple draws
                redrawHandler.removeCallbacks(redrawRunnable);
                redrawHandler.postDelayed(redrawRunnable, 500); // you add delay if you dont change oftent
            }
        }

        @Override
        public void onVisibilityChanged(boolean visible) {
            super.onVisibilityChanged(visible);
            isVisible = visible;
            // Called when your wallpaper is viewed or not so load settings if it is to show changes instantly
            if (isVisible) {
                final SharedPreferences preference = PreferenceManager.getDefaultSharedPreferences(getApplicationContext());
                checkedSettings = preference.getBoolean("pref_sample_checkbox", false);
                textToWrite = preference.getString("pref_text", null);
                timesToWrite = Integer.valueOf(preference.getString("pref_count", "0"));
                // Now we update the canvas
                draw();
            }
        }
    }
}


That's all of the basic foundation of creating live wallpaper with canvas. You can implement more method for the engine and the service to have more control. But this will basically give you all the initial starting point. Download the full project here
Friday, December 7, 2012

Join Query on Google App Engine Datastore

App Engine Datastore is a no sql database. That means you cannot  do standard sql queries. But they do have the basic queries using GQL. It is a no SQL database which is very reliable, does not slow down even with terabytes of data, and has a nice indexing mechanism to fetch data.

You can use Google Cloud SQL if you need a relational database, it's a manage mySQL database by google. By the time I'm writing this, they now support up to 100GB of mySQL database, but still limited to the limitations of a standard mySQL and it's coolness.

So if you really can't avoid a de-normalize table like you want to show the name of a user in a list, I'll show you samples. This will all be using python 2.7 and ndb.


from google.appengine.ext import ndb
# just a sample model for how to efficiently join them
class User(ndb.Model):
    name = ndb.StringProperty()
    photo = ndb.BlobKeyProperty()
    """
    always create a text version of your blobkey and store
    the result of images.get_serving_url(blob) to it one time if
    your app allows it
    """    
    photo_path = ndb.TextProperty()

class Page(ndb.Model):
    data = ndb.TextProperty()

class Comment(ndb.Model):
    user = ndb.KeyProperty()
    page = ndb.KeyProperty()
    created = ndb.DateTimeProperty(auto_now_add=True)
    message = ndb.TextProperty()

    @classmethod
    @ndb.tasklet
    def get_comment_async(comment):
        result = comment.to_dict()
        user = yield comment.user.get_async()
        result['user'] = { 'name' : user.name, 'photo' : user.photo_path }
        raise ndb.Return(result)

# on your handler
class MainHandler(webapp.RequestHandler):
    def get(self):
        page = Page.get_by_id(self.request.get('id'))
        # now we query comments of the page
        comments = Comment.query(Comment.page == page.key)
                          .order(-Comment.created).fetch()
        futures = []
        # another good use of tasklet is to load fields info asynchronously
        for comment in comments:
            futures.append(yield Comment.get_comment_async(comment))
        # ndb will try to batch what it can so it only does few network hops
        ndb.Future.wait_all(futures)
        # pass your results to your view
        view_data['comments'] = [future.get_result() for future in futures]
Thursday, December 6, 2012

Web Scraping with Google App Engine

Here is a quick tutorial on how you can scrape google search results asynchronously with app engine and caching its result in memcache. You should not use this directly because you can get blocked by google, this is just a sample for you on scraping web pages, feeds, xml, etc.

But if you do want to do something like this, I recommend adding delays, and act more like a human on your scrapes. But I believe that is against their TOS.

I added the use of async here for people who don't know how to use them yet so they can learn in the process. The code below is a complete working google search scraper, read the code comments to understand everything.

This is all done with python 2.7 with ndb

app.yaml
application: your-application-id
version: 1
runtime: python27
api_version: 1
threadsafe: true

handlers:
- url: /.*
  script: main.app

libraries:
- name: lxml
  version: latest
main.py
import urllib
from urlparse import urlparse, parse_qs
from google.appengine.ext import webapp, ndb
from lxml import html

# make the function an ndb.tasklet so you don't need to wait for each search
@ndb.tasklet
def search_google_async(keyword):
    """
    ndb has all the async methods of memcache & urlfetch
    and tries to auto batch everything behind the scenes
    """
    ctx = ndb.get_context()
    url = 'http://www.google.com/search?' + urllib.urlencode({ 'q' : keyword })
    """
    if you don't know yield, you should read up on it a bit, google yield and generators with python
    simple explanation: your function will stop here and do all the operations in batches
    then continue on with the next yields
    """
    # check first if you already cached the results
    cache = yield ctx.memcache_get(url)
    if cache:
        """
        tasklets returns by raising an exception so converting a normal function to its async
        counterpart you just add yield before any async calls
        then change return to raise
        """
        # if you did return the cached results
        raise ndb.Return(cache)

    # we use async method of urlfetch from ndb context
    response = yield ctx.urlfetch(url)

    links = []
    if response.status_code == 200:
        raw_html = response.content
        # use the lxml library to convert the string to dom
        dom = html.fromstring(raw_html)
        # use a css selector to get all anchor tags
        anchors = dom.cssselect('a')
        for anchor in anchors:
            # get its href attribute
            link = anchor.get('href')
            """
            since google put all the results like this,
            you can probably do a[href^=/url?q=] on the css selector
            """
            if link.startswith('/url?q='):
                # we get the query string q= you can do this however you want
                # it stores the url of the results
                parsedUrl = urlparse('http://www.google.com' + link)
                queryStr = parse_qs(parsedUrl.query)
                links.append(queryStr['q'])
        
        """
        now we set the results in memcache with url key and value of list of links
        you can remove yield here and batch all of it later since we have
        app = ndb.toplevel(app)
        meaning it will not terminate until all async methods are finished
        """
        yield ctx.memcache_set(url, links)
        
    # we return the links of result
    raise ndb.Return(links)

class MainHandler(webapp.RequestHandler):

    def get(self):
        keywords = [
            'how to make pizza',
            'where can i buy a dog',
            'how big is the grand canyon'
        ]
        """
        an ndb.tasklet return sets of futures
        so we get them all then do everything with as little
        as possible calls, let the ndb stuff handle the batching
        """
        futures = []
        for keyword in keywords:
            futures.append(search_google_async(keyword))
        # so here is where everything waits for the results
        ndb.Future.wait_all(futures)
        # you call .get_result() which is the value you raised/returned in your tasklet
        self.response.out.write([future.get_result() for future in futures])


app = webapp.WSGIApplication([('/', MainHandler)],
                             debug=True)

# to make sure all unhandled async task are finished
app = ndb.toplevel(app)